###################################
## DR in an Era of Polarization  ##
## Dataset Replication for JOP   ## 
###################################

## LOAD PACKAGES ----
library(tidyverse) # tidy 
library(tidyselect)
library(forcats) # working with factors
library(haven) # for stata data 
library(foreign) # for stata data 
library(scales) # to rescale variables
library(stringr) # for policy variables
library(readxl) # for policy variables 

## LOAD CCES FILES FOR EACH YEAR 2006-2020 ----

cces_2006 <- read_dta("./CCES Files/cces_2006_common.dta")
cces_2007 <- read.spss("./CCES Files/CCES07_OUTPUT.sav") %>%
  as.data.frame()
cces_2008 <- read_dta("./CCES Files/cces_2008_common.dta")
cces_2009 <- read.csv("./CCES Files/cces_09_cummulative.csv")
cces_2010 <- read_dta("./CCES Files/cces_2010_common_validated.dta")
cces_2011 <- read_dta("./CCES Files/CCES11_Common_OUTPUT.dta")
cces_2012 <- read_dta("./CCES Files/CCES12_Common_VV.dta")
cces_2013 <- read_dta("./CCES Files/Common Content Data 13.dta")
cces_2014 <- read_dta("./CCES Files/CCES14_Common_Content_Validated.dta")
cces_2015 <- read_dta("./CCES Files/CCES15_Common_OUTPUT_Jan2016.dta")
cces_2016 <- read_dta("./CCES Files/CCES16_Common_OUTPUT_Feb2018_VV.dta")
cces_2017 <- read_dta("./CCES Files/Common Content Data 17.dta")
cces_2018 <- read_dta("./CCES Files/cces18_common_vv.dta")
cces_2019 <- read.csv("./CCES Files/CCES19_Common_OUTPUT_2.csv")
cces_2020 <- read.csv("./CCES Files/CCES20_Common_OUTPUT.csv")

## VARIABLES OF INTEREST FOR EACH YEAR (text here, not code) ----

## 2006 
# V1000
# v4066 (party of gov't knowledge House)
# v4069 (party of gov't knowledge senate)
# v3012 (party + name recall governor)
# v3014 (party + name recall sen1), v3016 (party + name recall sen2) 
# v3018 (party + name recall rep)
# v4022 (self ideo placement)
# v4023 (Dem ideo placement), v4024 (Rep ideo placement)

## 2007 
# caseid
# CC06_V4066 (party of gov't knowledge House)
# CC06_V4069 (party of gov't knowledge senate)
# CC06_V3012 (party + name recall governor)
# CC06_V3014 (party + name recall sen1), CC06_V3016 (party + name recall sen2) 
# CC06_V3018 (party + name recall rep)
# CC06_V4022 (self ideo placement)
# CC06_V4023 (Dem ideo placement), CC06_V4024 (Rep ideo placement)
# CC3 (Congress approval)

## 2008 
# V100
# CC308a (party of gov't knowledge House)
# CC308b (party of gov't knowledge senate)
# CC309a (party + name recall governor)
# CC309b (party + name recall sen1), CC309c (party + name recall sen2) 
# CC309d (party + name recall rep)
# CC317a (self ideo placement)
# CC317b (Dem ideo placement), CC317c (Rep ideo placement)
# CC319 (What is the race or ethnicity of your member of the U. S. House of Representatives?)
# CC335cong (Congress approval)

## 2009 
# V100
# CC09_40E (party of gov't knowledge House)
# CC09_40F (party of gov't knowledge senate)
# CC09_40B (party + name recall governor)
# CC09_40C (party + name recall sen1), CC09_40D (party + name recall sen2) 
# CC09_40A (party + name recall rep)
# CC09_42A (self ideo placement)
# CC09_42D (Dem ideo placement), CC09_42E (Rep ideo placement)
# CC09_43D (Congress approval)

##2010 
# V100
# CC309a (party of gov't knowledge House)
# CC309b (party of gov't knowledge senate)
# CC310a (party + name recall governor)
# CC310b (party + name recall sen1), CC310c (party + name recall sen2) 
# CC310d (party + name recall rep)
# CC334A (self ideo placement)
# CC334D (Dem ideo placement), CC334E (Rep ideo placement)
# CC423c (What race or ethnicity is $CurrentHouseName?)
# CC308b (Congress approval)

## 2011
# V100
# CC321a (party of gov't knowledge House)
# CC321b (party of gov't knowledge senate)
# CC322a (party + name recall governor)
# CC322b (party + name recall sen1), CC322c (party + name recall sen2) 
# CC322d (party + name recall rep)
# CC342A (self ideo placement)
# CC342D (Dem ideo placement), CC342E (Rep ideo placement)
# CC320b (House approval)

## 2012 
# V101
# CC309a (party of gov't knowledge House)
# CC309b (party of gov't knowledge senate)
# CC310a (party + name recall governor)
# CC310b (party + name recall sen1), CC310c (party + name recall sen2) 
# CC310d (party + name recall rep)
# CC334A (self ideo placement)
# CC334E (Dem ideo placement), CC334F (Rep ideo placement)
# CC423c (Race or Ethnicity - $CurrentHouseName)
# CC308b (Congress approval)

## 2013 
# caseid
# CC13_310a (party of gov't knowledge House)
# CC13_310b (party of gov't knowledge senate)
# CC13_311a (party + name recall governor)
# CC13_311b (party + name recall sen1), CC13_311c (party + name recall sen2) 
# CC13_311d (party + name recall rep)
# CC334A (self ideo placement)
# CC334E (Dem ideo placement), CC334F (Rep ideo placement)
# CC312b (Congress approval)

## 2014 
# V101
# CC14_309a (party of gov't knowledge House)
# CC14_309b (party of gov't knowledge senate)
# CC14_310a (party + name recall governor)
# CC14_310b (party + name recall sen1), CC14_310c (party + name recall sen2) 
# CC14_310d (party + name recall rep)
# CC334A (self ideo placement)
# CC334K (Dem ideo placement), CC334L (Rep ideo placement)
# CC423c (Race or Ethnicity - $CurrentHouseName)
# CC14_308b (Congress approval)

## 2015 
# V101
# CC15_310a (party of gov't knowledge House)
# CC15_310b (party of gov't knowledge senate)
# CC15_311a (party + name recall governor)
# CC15_311b (party + name recall sen1), CC15_311c (party + name recall sen2) 
# CC15_311d (party + name recall rep)
# CC15_340a (self ideo placement)
# CC15_340d (Dem ideo placement), CC15_340e (Rep ideo placement)
# CC15_312b (House Approval)

## 2016
# V101
# CC16_321a (party of gov't knowledge House)
# CC16_321b (party of gov't knowledge senate)
# CC16_322a (party + name recall governor)
# CC16_322b (party + name recall sen1), CC16_322c (party + name recall sen2) 
# CC16_322d (party + name recall rep)
# CC16_340a (self ideo placement)
# CC16_340g (Dem ideo placement), CC16_340h (Rep ideo placement)
# CC16_423c (What is the race or ethnicity of the following candidates or politicians? Current House Rep)
# CC16_320b (Congress approval)

## 2017
# V101
# CC17_320b (party of gov't knowledge House)
# CC17_320c (party of gov't knowledge senate)
# CC17_321a (party + name recall governor)
# CC17_321b (party + name recall sen1), CC17_321c (party + name recall sen2) 
# CC17_321d (party + name recall rep)
# CC17_350a (self ideo placement)
# CC17_350d (Dem ideo placement), CC17_350e (Rep ideo placement)
# CC17_322b (House approval)

## 2018
# caseid
# CC18_309a (party of gov't knowledge House)
# CC18_309b (party of gov't knowledge senate)
# CC18_310a (party + name recall governor)
# CC18_310b (party + name recall sen1), CC18_310c (party + name recall sen2) 
# CC18_310d (party + name recall rep)
# CC18_334A (self ideo placement)
# CC18_334D (Dem ideo placement), CC18_334E (Rep ideo placement)
# CC18_423c (Race/ethnicity politicians - $CurrentHouseName)
# CC18_308b (Congress approval)

## 2019
# caseid
# CC19_309a (party of gov't knowledge House)
# CC19_309b (party of gov't knowledge senate)
# CC19_310a (party + name recall governor)
# CC19_310b (party + name recall sen1), CC19_310c (party + name recall sen2) 
# CC19_310d (party + name recall rep)
# CC19_334a (self ideo placement)
# CC19_334d (Dem ideo placement), CC19_334e (Rep ideo placement)
# CC19_308b (House approval)

## 2020
# caseid
# CC20_310a (party of gov't knowledge House)
# CC20_310b (party of gov't knowledge senate)
# CC20_311a (party + name recall governor)
# CC20_311b (party + name recall sen1), CC20_311c (party + name recall sen2) 
# CC20_311d (party + name recall rep)
# CC20_340a (self ideo placement)
# CC20_340e (Dem ideo placement), CC20_340f (Rep ideo placement)
# CC20_416c (Race/Ethnicity of politicians - $CurrentHouseName)
# CC20_320b (Congress approval)


## SELECTING VARIABLES TO RECODE PRE-MERGE ----

# This section creates subsets from each CCES file with the variables of interest (see section above on variables of interest)
# Comments on the first subset for 2006 apply to the years that follow as well 
# I then merge these subsets together once the variables have been renamed to make merge possible

cces_2006_sub <- cces_2006 %>%
  select(v1000, v4066, v4069, v3012, v3014, v3016, v3018, v4022, v4023, v4024) %>% # select variable
  mutate(year = 2006, # add variable for year 
         perceive_race = NA_real_) %>% # add variable for perceived race 
  rename(case_id = v1000, know_maj_house = v4066, know_maj_sen = v4069, # rename variables 
         recall_gov = v3012, recall_sen1 = v3014, recall_sen2 = v3016,
         recall_rep = v3018, ideo_self = v4022, ideo_d = v4023,
         ideo_r = v4024)

cces_2007_sub <- cces_2007 %>%
  select(caseid, CC06_V4066, CC06_V4069, CC06_V3012, CC06_V3014, CC06_V3016, CC06_V3018, 
         CC06_V4022, CC06_V4023, CC06_V4024, CC3) %>%
  mutate(year = 2007,
         perceive_race = NA_real_) %>%
  rename(case_id = caseid, know_maj_house = CC06_V4066, know_maj_sen = CC06_V4069,
         recall_gov = CC06_V3012, recall_sen1 = CC06_V3014, recall_sen2 = CC06_V3016,
         recall_rep = CC06_V3018, ideo_self = CC06_V4022, ideo_d = CC06_V4023,
         ideo_r = CC06_V4024, cong_approval = CC3)


cces_2008_sub <- cces_2008 %>% 
  select(V100, CC308a, CC308b, CC309a, CC309b, CC309c, CC309d, CC317a, CC317b, CC317c, CC319, CC335cong) %>%
  mutate(year = 2008) %>%
  rename(case_id = V100, know_maj_house = CC308a, know_maj_sen = CC308b,
         recall_gov = CC309a, recall_sen1 = CC309b, recall_sen2 = CC309c,
         recall_rep = CC309d, ideo_self = CC317a, ideo_d = CC317b,
         ideo_r = CC317c, perceive_race = CC319, cong_approval = CC335cong)


cces_2009_sub <- cces_2009 %>% 
  select(v100, cc09_40e, cc09_40f, cc09_40b, cc09_40c, cc09_40d, cc09_40a, cc09_42a, cc09_42d, cc09_42e, cc09_43d) %>%
  mutate(year = 2009,
         perceive_race = NA_real_) %>%
  rename(case_id = v100, know_maj_house = cc09_40e, know_maj_sen = cc09_40f,
         recall_gov = cc09_40b, recall_sen1 = cc09_40c, recall_sen2 = cc09_40d,
         recall_rep = cc09_40a, ideo_self = cc09_42a, ideo_d = cc09_42d,
         ideo_r = cc09_42e, cong_approval = cc09_43d)

cces_2010_sub <- cces_2010 %>% 
  select(V100, CC309a, CC309b, CC310a, CC310b, CC310c, CC310d, CC334A, CC334D, CC334E, CC423c, CC308b) %>%
  mutate(year = 2010) %>%
  rename(case_id = V100, know_maj_house = CC309a, know_maj_sen = CC309b,
         recall_gov = CC310a, recall_sen1 = CC310b, recall_sen2 = CC310c,
         recall_rep = CC310d, ideo_self = CC334A, ideo_d = CC334D,
         ideo_r = CC334E, perceive_race = CC423c, cong_approval = CC308b)


cces_2011_sub <- cces_2011 %>%
  select(V100, CC321a, CC321b, CC322a, CC322b, CC322c, CC322d, CC342A, CC342D, CC342E, CC320b) %>%
  mutate(year = 2011,
         perceive_race = NA_real_) %>%
  rename(case_id = V100, know_maj_house = CC321a, know_maj_sen = CC321b,
         recall_gov = CC322a, recall_sen1 = CC322b, recall_sen2 = CC322c,
         recall_rep = CC322d, ideo_self = CC342A, ideo_d = CC342D,
         ideo_r = CC342E, cong_approval = CC320b)



cces_2012_sub <- cces_2012 %>% 
  select(V101, CC309a, CC309b, CC310a, CC310b, CC310c, CC310d, CC334A, CC334E, CC334F, CC423c, CC308b) %>%
  mutate(year = 2012) %>%
  rename(case_id = V101, know_maj_house = CC309a, know_maj_sen = CC309b,
         recall_gov = CC310a, recall_sen1 = CC310b, recall_sen2 = CC310c,
         recall_rep = CC310d, ideo_self = CC334A, ideo_d = CC334E,
         ideo_r = CC334F, perceive_race = CC423c, cong_approval = CC308b)


cces_2013_sub <- cces_2013 %>% 
  select(caseid, CC13_310a, CC13_310b, CC13_311a, CC13_311b, CC13_311c, CC13_311d, CC334A, CC334E, CC334F, CC312b) %>%
  mutate(year = 2013,
         perceive_race = NA_real_) %>%
  rename(case_id = caseid, know_maj_house = CC13_310a, know_maj_sen = CC13_310b,
         recall_gov = CC13_311a, recall_sen1 = CC13_311b, recall_sen2 = CC13_311c,
         recall_rep = CC13_311d, ideo_self = CC334A, ideo_d = CC334E,
         ideo_r = CC334F, cong_approval = CC312b)


cces_2014_sub <- cces_2014 %>%
  select(V101, CC14_309a, CC14_309b, CC14_310a, CC14_310b, CC14_310c, CC14_310d, CC334A, CC334K, CC334L, CC423c, CC14_308b) %>%
  mutate(year = 2014) %>%
  rename(case_id = V101, know_maj_house = CC14_309a, know_maj_sen = CC14_309b,
         recall_gov = CC14_310a, recall_sen1 = CC14_310b, recall_sen2 = CC14_310c,
         recall_rep = CC14_310d, ideo_self = CC334A, ideo_d = CC334K,
         ideo_r = CC334L, perceive_race = CC423c, cong_approval = CC14_308b)


cces_2015_sub <- cces_2015 %>%
  select(V101, CC15_310a, CC15_310b, CC15_311a, CC15_311b, CC15_311c, CC15_311d, CC15_340a, CC15_340d, CC15_340e, CC15_312b) %>%
  mutate(year = 2015,
         perceive_race = NA_real_) %>%
  rename(case_id = V101, know_maj_house = CC15_310a, know_maj_sen = CC15_310b,
         recall_gov = CC15_311a, recall_sen1 = CC15_311b, recall_sen2 = CC15_311c,
         recall_rep = CC15_311d, ideo_self = CC15_340a, ideo_d = CC15_340d,
         ideo_r = CC15_340e, cong_approval = CC15_312b)


cces_2016_sub <- cces_2016 %>%
  select(V101, CC16_321a, CC16_321b, CC16_322a, CC16_322b, CC16_322c, CC16_322d, CC16_340a, CC16_340g,
         CC16_340h, CC16_423c, CC16_320b) %>%
  mutate(year = 2016) %>%
  rename(case_id = V101, know_maj_house = CC16_321a, know_maj_sen = CC16_321b,
         recall_gov = CC16_322a, recall_sen1 = CC16_322b, recall_sen2 = CC16_322c,
         recall_rep = CC16_322d, ideo_self = CC16_340a, ideo_d = CC16_340g,
         ideo_r = CC16_340h, perceive_race = CC16_423c, cong_approval = CC16_320b)


cces_2017_sub <- cces_2017 %>%
  select(V101, CC17_320b, CC17_320c, CC17_321a, CC17_321b, CC17_321c, CC17_321d, CC17_350a, CC17_350d, 
         CC17_350e, CC17_322b) %>%
  mutate(year = 2017,
         perceive_race = NA_real_) %>%
  rename(case_id = V101, know_maj_house = CC17_320b, know_maj_sen = CC17_320c,
         recall_gov = CC17_321a, recall_sen1 = CC17_321b, recall_sen2 = CC17_321c,
         recall_rep = CC17_321d, ideo_self = CC17_350a, ideo_d = CC17_350d,
         ideo_r = CC17_350e, cong_approval = CC17_322b)


cces_2018_sub <- cces_2018 %>%
  select(caseid, CC18_309a, CC18_309b, CC18_310a, CC18_310b, CC18_310c, CC18_310d, CC18_334A, 
         CC18_334D, CC18_334E, CC18_423c, CC18_308b) %>%
  mutate(year = 2018) %>%
  rename(case_id = caseid, know_maj_house = CC18_309a, know_maj_sen = CC18_309b,
         recall_gov = CC18_310a, recall_sen1 = CC18_310b, recall_sen2 = CC18_310c,
         recall_rep = CC18_310d, ideo_self = CC18_334A, ideo_d = CC18_334D,
         ideo_r = CC18_334E, perceive_race = CC18_423c, cong_approval = CC18_308b)


cces_2019_sub <- cces_2019 %>%
  select(caseid, CC19_309a, CC19_309b, CC19_310a, CC19_310b, CC19_310c, CC19_310d, CC19_334a, CC19_334d, 
         CC19_334e, CC19_308b) %>%
  mutate(year = 2019,
         perceive_race = NA_real_) %>%
  rename(case_id = caseid, know_maj_house = CC19_309a, know_maj_sen = CC19_309b,
         recall_gov = CC19_310a, recall_sen1 = CC19_310b, recall_sen2 = CC19_310c,
         recall_rep = CC19_310d, ideo_self = CC19_334a, ideo_d = CC19_334d,
         ideo_r = CC19_334e, cong_approval = CC19_308b)

cces_2020_sub <- cces_2020 %>%
  select(caseid, CC20_310a, CC20_310b, CC20_311a, CC20_311b, CC20_311c, CC20_311d, CC20_340a, CC20_340e, 
         CC20_340f, CC20_416c, CC20_320b) %>%
  mutate(year = 2020) %>%
  rename(case_id = caseid, know_maj_house = CC20_310a, know_maj_sen = CC20_310b,
         recall_gov = CC20_311a, recall_sen1 = CC20_311b, recall_sen2 = CC20_311c,
         recall_rep = CC20_311d, ideo_self = CC20_340a, ideo_d = CC20_340e,
         ideo_r = CC20_340f, perceive_race = CC20_416c, cong_approval = CC20_320b)

## RECODING VARIABLES (for consistency across years) ----
# This section recodes variables so that each year has values that correspond 
  # For example, the variable coding for race switches in some years, so I recode that to ensure values for race are the same across years

# I comment each recode on 2006, but these same recodes carry throughout and I add additional comments if there are additional recodes

# 2006 ----
# v4066: 1 - D, 2 - R, 3 - not sure 
# v4069: 1 - D, 2 - R, 3 - 50 D and 50 R, 4 - not sure 
# v3012: 1 - D, 2 - R, 3 - I, 4 - don't know 
# v3014: 1 - D, 2 - R, 3 - I, 4 - don't know 
# v3016: 1 - D, 2 - R, 3 - I, 4 - don't know 
# v3018: 1 - D, 2 - R, 3 - I, 4 - don't know 
# v4022: 0-100 scale, 101 don't know
# v4023: 0-100 scale, 101 don't know
# v4024:0-100 scale, 101 don't know

# Make sure variables are numeric 
cces_2006_sub <- cces_2006_sub %>%
  mutate(ideo_self = as.numeric(as.character(ideo_self)),
         ideo_d = as.numeric(as.character(ideo_d)),
         ideo_r = as.numeric(as.character(ideo_r))) 

# Recode values 
cces_2006.2 <- cces_2006_sub %>%
  mutate(cong_approval = NA_real_, # no variable for this in 2006, so generate variable and fill with NA 
         # response when asked if they know majority party in the house
         know_maj_house = case_when(know_maj_house==1 ~ "D", 
                                    know_maj_house==2 ~ "R",
                                    know_maj_house==3 ~ "not sure", 
                                    TRUE ~ "."),
         # response when asked if they know majority party in the senate
         know_maj_sen = case_when(know_maj_sen==1 ~ "D", 
                                  know_maj_sen==2 ~ "R",
                                  know_maj_sen==3 ~ "50D-50R",
                                  know_maj_sen==4 ~ "not sure",
                                  TRUE ~ "."),
         # response when asked if they can recall governor's party
         recall_gov = case_when(recall_gov==1 ~ "D", 
                                recall_gov==2 ~ "R",
                                recall_gov==3 ~ "I",
                                recall_gov==4 ~ "not sure",
                                TRUE ~ "."),
         # response when asked if they can recall senator's party
         recall_sen1 = case_when(recall_sen1==1 ~ "D", 
                                 recall_sen1==2 ~ "R",
                                 recall_sen1==3 ~ "I",
                                 recall_sen1==4 ~ "not sure",
                                 TRUE ~ "."),
         # response when asked if they can recall senator's party
         recall_sen2 = case_when(recall_sen2==1 ~ "D", 
                                 recall_sen2==2 ~ "R",
                                 recall_sen2==3 ~ "I",
                                 recall_sen2==4 ~ "not sure",
                                 TRUE ~ "."),
         # response when asked if they can recall representatives's party
         recall_rep = case_when(recall_rep==1 ~ "D", 
                                recall_rep==2 ~ "R",
                                recall_rep==3 ~ "I",
                                recall_rep==4 ~ "not sure",
                                TRUE ~ "."),
         # don't know as NA for ideology questions (self-placed ideology)
         ideo_self_na = (case_when(ideo_self==101 ~ NA_real_, TRUE ~ ideo_self)),
         ideo_d_na = (case_when(ideo_d==101 ~ NA_real_, TRUE ~ ideo_d)),
         ideo_r_na = (case_when(ideo_r==101 ~ NA_real_, TRUE ~ ideo_r)),
         # don't know as middle value for ideology questions (self-placed ideology)
         ideo_self = ifelse(ideo_self==101, 50, ideo_self),
         ideo_d = ifelse(ideo_d==101, 50, ideo_d),
         ideo_r = ifelse(ideo_r==101, 50, ideo_r)) %>%
  # Put ideology on 0 to 1 scale 
  mutate(ideo_self = (ideo_self/100), ideo_self_na = (ideo_self_na/100),
         ideo_d = (ideo_d/100), ideo_d_na = (ideo_d_na/100),
         ideo_r = (ideo_r/100), ideo_r_na = (ideo_r_na/100))

# 2007 ----
# CC06_V4066: 1 - D, 2 - R, 3 - not sure, 8 - skipped, 9 - not asked 
# CC06_V4069: 1 - D, 2 - R, 3 - 50 D and 50 R, 4 - not sure, 8 - skipped, 9 - not asked 
# CC06_V3012: 1 - D, 2 - R, 3 - I, 4 - don't know, 8 - skipped, 9 - not asked 
# CC06_V3014: 1 - D, 2 - R, 3 - I, 4 - don't know, 8 - skipped, 9 - not asked 
# CC06_V3016: 1 - D, 2 - R, 3 - I, 4 - don't know, 8 - skipped, 9 - not asked 
# CC06_V3018: 1 - D, 2 - R, 3 - I, 4 - don't know, 8 - skipped, 9 - not asked 
# CC06_V4022: 0-100 scale, 101 don't know, 998 - skipped, 999 - not asked
# CC06_V4023: 0-100 scale, 101 don't know, 998 - skipped, 999 - not asked
# CC06_V4024: 0-100 scale, 101 don't know, 998 - skipped, 999 - not asked
# CC3: "Somewhat approve" - "somewhat a", "Somewhat disapprove" - "somewhat d", "Strongly approve" - "strongly a",
# "Strongly disapprove" - "strongly d", "Not sure" - "not sure", "Skipped" - "skipped", 
# "Neither approve nor disapprove" - "neither a nor d"

# Make sure variables are numeric 
cces_2007_sub <- cces_2007_sub %>%
  mutate(ideo_self = as.numeric(as.character(ideo_self)),
         ideo_d = as.numeric(as.character(ideo_d)),
         ideo_r = as.numeric(as.character(ideo_r))) 

# Recode values 
cces_2007.2 <- cces_2007_sub %>%
  mutate(cong_approval = case_when(cong_approval=="Somewhat approve" ~ "somewhat a",
                                   cong_approval=="Somewhat disapprove" ~ "somewhat d",
                                   cong_approval=="Strongly approve" ~ "strongly a",
                                   cong_approval=="Strongly disapprove" ~ "strongly d",
                                   cong_approval=="Not sure" ~ "not sure",
                                   cong_approval=="Skipped" ~ "skipped",
                                   cong_approval=="Neither approve nor disapprove" ~ "neither a or d",
                                   TRUE ~ "."),
         know_maj_house = case_when(know_maj_house=="The Democrats" ~ "D", 
                                    know_maj_house=="The Republicans" ~ "R",
                                    know_maj_house=="Not Sure" ~ "not sure", 
                                    know_maj_house=="Skipped" ~ ".",
                                    know_maj_house=="Not Asked" ~ ".",
                                    TRUE ~ "."),
         know_maj_sen = case_when(know_maj_sen=="The Democrats" ~ "D", 
                                  know_maj_sen=="The Republicans" ~ "R",
                                  know_maj_sen=="There will be 50 Democrats and 50 Republicans" ~ "50D-50R",
                                  know_maj_sen=="Not Sure" ~ "not sure",
                                  know_maj_sen=="Skipped" ~ ".",
                                  know_maj_sen=="Not Asked" ~ ".",
                                  TRUE ~ "."),
         recall_gov = case_when(recall_gov=="Democrat" ~ "D", 
                                recall_gov=="Republican" ~ "R",
                                recall_gov=="Independent" ~ "I",
                                recall_gov=="Don't Know" ~ "not sure",
                                recall_gov=="Skipped" ~ ".",
                                recall_gov=="Not Asked" ~ ".",
                                TRUE ~ "."),
         recall_sen1 = case_when(recall_sen1=="Democrat" ~ "D", 
                                 recall_sen1=="Republican" ~ "R",
                                 recall_sen1=="Independent" ~ "I",
                                 recall_sen1=="Don't Know" ~ "not sure",
                                 recall_sen1=="Skipped" ~ ".",
                                 recall_sen1=="Not Asked" ~ ".",
                                 TRUE ~ "."),
         recall_sen2 = case_when(recall_sen2=="Democrat" ~ "D", 
                                 recall_sen2=="Republican" ~ "R",
                                 recall_sen2=="Independent" ~ "I",
                                 recall_sen2=="Don't Know" ~ "not sure",
                                 recall_sen2=="Skipped" ~ ".",
                                 recall_sen2=="Not Asked" ~ ".",
                                 TRUE ~ "."),
         recall_rep = case_when(recall_rep=="Democrat" ~ "D", 
                                recall_rep=="Republican" ~ "R",
                                recall_rep=="Independent" ~ "I",
                                recall_rep=="Don't Know" ~ "not sure",
                                recall_rep=="Skipped" ~ ".",
                                recall_rep=="Not Asked" ~ ".",
                                TRUE ~ "."),
         # don't know as na
         ideo_self_na = (case_when(ideo_self>=101 ~ NA_real_, TRUE ~ ideo_self)),
         ideo_d_na = (case_when(ideo_d>=101 ~ NA_real_, TRUE ~ ideo_d)),
         ideo_r_na = (case_when(ideo_r>=101 ~ NA_real_, TRUE ~ ideo_r)),
         # don't know as middle value (other non-response as NA)
         ideo_self = ifelse(ideo_self==101, 50, ideo_self),
         ideo_d = ifelse(ideo_d==101, 50, ideo_d),
         ideo_r = ifelse(ideo_r==101, 50, ideo_r))%>%
  # Recode ideology to 0 to 1 scale 
  mutate(ideo_self = (ideo_self/100), ideo_self_na = (ideo_self_na/100),
         ideo_d = (ideo_d/100), ideo_d_na = (ideo_d_na/100),
         ideo_r = (ideo_r/100), ideo_r_na = (ideo_r_na/100))


# 2008 ----
# CC308a: 1 - R, 2 - D, 3 - neither, 4 - not sure, 8 - skipped, 9 - not asked 
# CC308b: 1 - R, 2 - D, 3 - neither, 4 - not sure, 8 - skipped, 9 - not asked 
# CC309a: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked 
# CC309b: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked 
# CC309c: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked 
# CC309d: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked 
# CC317a: 0-100 scale, 997 - not sure, 998 - skipped, 999 - not asked
# CC317b: 0-100 scale, 997 - not sure, 998 - skipped, 999 - not asked
# CC317c: 0-100 scale, 997 - not sure, 998 - skipped, 999 - not asked
# perceive_race: 1 - white, 2 - black, 3 - hispanic, 4 - other, 5 - not sure, 8 - skipped, 9 - not asked 
# CC335cong: 2 - "somewhat a", 3 - "somewhat d", 1 - "strongly a",
# 4 - "strongly d", 5 - "not sure", 8 - "skipped", 9 - "not asked"

# Make sure variables are numeric 
cces_2008_sub <- cces_2008_sub %>%
  mutate(ideo_self = as.numeric(as.character(ideo_self)),
         ideo_d = as.numeric(as.character(ideo_d)),
         ideo_r = as.numeric(as.character(ideo_r))) 

# Recode values 
cces_2008.2 <- cces_2008_sub %>%
  mutate(cong_approval = case_when(cong_approval==2 ~ "somewhat a",
                                   cong_approval==3 ~ "somewhat d",
                                   cong_approval==1 ~ "strongly a",
                                   cong_approval==4 ~ "strongly d",
                                   cong_approval==5 ~ "not sure",
                                   cong_approval==8 ~ "skipped",
                                   cong_approval==9 ~ "not asked",
                                   TRUE ~ "."),
         know_maj_house = case_when(know_maj_house==1 ~ "R", 
                                    know_maj_house==2 ~ "D",
                                    know_maj_house==3 ~ "neither", 
                                    know_maj_house==4 ~ "not sure", 
                                    know_maj_house==8 ~ ".", 
                                    know_maj_house==9 ~ ".", 
                                    TRUE ~ "."),
         know_maj_sen = case_when(know_maj_sen==1 ~ "R", 
                                  know_maj_sen==2 ~ "D",
                                  know_maj_sen==3 ~ "neither", 
                                  know_maj_sen==4 ~ "not sure", 
                                  know_maj_sen==8 ~ ".", 
                                  know_maj_sen==9 ~ ".", 
                                  TRUE ~ "."),
         recall_gov = case_when(recall_gov==1 ~ "not heard of", 
                                recall_gov==2 ~ "R",
                                recall_gov==3 ~ "D",
                                recall_gov==4 ~ "other/I",
                                recall_gov==5 ~ "not sure",
                                recall_gov==8 ~ ".", 
                                recall_gov==9 ~ ".", 
                                TRUE ~ "."),
         recall_sen1 = case_when(recall_sen1==1 ~ "not heard of", 
                                 recall_sen1==2 ~ "R",
                                 recall_sen1==3 ~ "D",
                                 recall_sen1==4 ~ "other/I",
                                 recall_sen1==5 ~ "not sure",
                                 recall_sen1==8 ~ ".", 
                                 recall_sen1==9 ~ ".", 
                                 TRUE ~ "."),
         recall_sen2 = case_when(recall_sen2==1 ~ "not heard of", 
                                 recall_sen2==2 ~ "R",
                                 recall_sen2==3 ~ "D",
                                 recall_sen2==4 ~ "other/I",
                                 recall_sen2==5 ~ "not sure",
                                 recall_sen2==8 ~ ".", 
                                 recall_sen2==9 ~ ".", 
                                 TRUE ~ "."),
         recall_rep = case_when(recall_rep==1 ~ "not heard of", 
                                recall_rep==2 ~ "R",
                                recall_rep==3 ~ "D",
                                recall_rep==4 ~ "other/I",
                                recall_rep==5 ~ "not sure",
                                recall_rep==8 ~ ".", 
                                recall_rep==9 ~ ".", 
                                TRUE ~ "."),
         # for 2008, perceived race doesn't include "Asian", so re-coding numbers here so matches other years
         perceive_race = case_when(perceive_race == 1 ~ 1,
                                   perceive_race == 2 ~ 2,
                                   perceive_race == 3 ~ 3,
                                   perceive_race == 4 ~ 5,
                                   perceive_race == 5 ~ 6,
                                   perceive_race == 4 ~ 5,
                                   perceive_race == 8 ~ 8,
                                   perceive_race == 9 ~ 9,
                                   TRUE ~ NA_real_),
         # dont know as na 
         ideo_self_na = (case_when(ideo_self>=101 ~ NA_real_,
                                   TRUE ~ ideo_self)),
         ideo_d_na = (case_when(ideo_d>=101 ~ NA_real_,
                                TRUE ~ ideo_d)),
         ideo_r_na = (case_when(ideo_r>=101 ~ NA_real_,
                                TRUE ~ ideo_r)),
         # don't know as middle value (other non-response as NA)
         ideo_self = ifelse(ideo_self==101, 50, ideo_self),
         ideo_d = ifelse(ideo_d==101, 50, ideo_d),
         ideo_r = ifelse(ideo_r==101, 50, ideo_r)) %>%
  # Recode ideology to 0 to 1 scale 
  mutate(ideo_self = (ideo_self/100), ideo_self_na = (ideo_self_na/100),
         ideo_d = (ideo_d/100), ideo_d_na = (ideo_d_na/100),
         ideo_r = (ideo_r/100), ideo_r_na = (ideo_r_na/100))


# 2009 ----
# cc09_40e: 1 - D, 2 - R, 3 - not sure, 8 - skipped, 9 - not asked 
# cc09_40f: 1 - D, 2 - R, 3 - not sure, 8 - skipped, 9 - not asked 
# cc09_40b: 1 - D, 2 - R, 3 - not sure, 8 - skipped, 9 - not asked 
# cc09_40c: 1 - D, 2 - R, 3 - not sure, 8 - skipped, 9 - not asked 
# cc09_40d: 1 - D, 2 - R, 3 - not sure, 8 - skipped, 9 - not asked 
# cc09_40a: 1 - D, 2 - R, 3 - not sure, 8 - skipped, 9 - not asked 
# cc09_42a: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - skipped, 9 - not asked 
# cc09_42d: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - skipped, 9 - not asked 
# cc09_42e: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - skipped, 9 - not asked 
# cc09_43d: 1 - "strongly a", 2 - "somewhat a", 3 - "somewhat d", 4 - "strongly d", 5 - "never heard", 8 - "skipped", 0 - "not asked"

# Make sure variables are numeric 
cces_2009_sub <- cces_2009_sub %>%
  mutate(ideo_self = as.numeric(as.character(ideo_self)),
         ideo_d = as.numeric(as.character(ideo_d)),
         ideo_r = as.numeric(as.character(ideo_r))) 

# Recode Values 
cces_2009.2 <- cces_2009_sub %>%
  mutate(cong_approval = case_when(cong_approval==2 ~ "somewhat a",
                                   cong_approval==3 ~ "somewhat d",
                                   cong_approval==1 ~ "strongly a",
                                   cong_approval==4 ~ "strongly d",
                                   cong_approval==5 ~ "never heard",
                                   cong_approval==8 ~ "skipped",
                                   cong_approval==9 ~ "not asked",
                                   TRUE ~ "."),
         know_maj_house = case_when(know_maj_house==1 ~ "R", 
                                    know_maj_house==2 ~ "D",
                                    know_maj_house==3 ~ "not sure",
                                    know_maj_house==8 ~ ".", 
                                    know_maj_house==9 ~ ".", 
                                    TRUE ~ "."),
         know_maj_sen = case_when(know_maj_sen==1 ~ "R", 
                                  know_maj_sen==2 ~ "D",
                                  know_maj_sen==3 ~ "not sure",
                                  know_maj_sen==8 ~ ".", 
                                  know_maj_sen==9 ~ ".", 
                                  TRUE ~ "."),
         recall_gov = case_when(recall_gov==1 ~ "R", 
                                recall_gov==2 ~ "D",
                                recall_gov==3 ~ "not sure",
                                recall_gov==8 ~ ".", 
                                recall_gov==9 ~ ".", 
                                TRUE ~ "."),
         recall_sen1 = case_when(recall_sen1==1 ~ "R", 
                                 recall_sen1==2 ~ "D",
                                 recall_sen1==3 ~ "not sure",
                                 recall_sen1==8 ~ ".", 
                                 recall_sen1==9 ~ ".", 
                                 TRUE ~ "."),
         recall_sen2 = case_when(recall_sen2==1 ~ "R", 
                                 recall_sen2==2 ~ "D",
                                 recall_sen2==3 ~ "not sure",
                                 recall_sen2==8 ~ ".", 
                                 recall_sen2==9 ~ ".", 
                                 TRUE ~ "."),
         recall_rep = case_when(recall_rep==1 ~ "R", 
                                recall_rep==2 ~ "D",
                                recall_rep==3 ~ "not sure",
                                recall_rep==8 ~ ".", 
                                recall_rep==9 ~ ".", 
                                TRUE ~ "."),
         # Skipped and not asked NA 
         ideo_self_na = (case_when(ideo_self==8 ~ NA_real_, ideo_self==9 ~ NA_real_, TRUE ~ ideo_self)),
         ideo_d_na = (case_when(ideo_d==8 ~ NA_real_, ideo_d==9 ~ NA_real_, TRUE ~ ideo_d)),
         ideo_r_na = (case_when(ideo_r==8 ~ NA_real_, ideo_r==9 ~ NA_real_, TRUE ~ ideo_r)))
# don't know as middle value - 8 and 9 are skipped and not asked, so no don't knows 
# leave ideo_self, ideo_d, and ideo_r as is 

# Convert ideology on likert scale to 0-1 scale 
cces_2009.2$ideo_self <- round((cces_2009.2$ideo_self-min(na.omit(cces_2009.2$ideo_self)))/
                                 (max(na.omit(cces_2009.2$ideo_self))-min(na.omit(cces_2009.2$ideo_self))),2)

cces_2009.2$ideo_d <- round((cces_2009.2$ideo_d-min(na.omit(cces_2009.2$ideo_d)))/
                              (max(na.omit(cces_2009.2$ideo_d))-min(na.omit(cces_2009.2$ideo_d))),2)

cces_2009.2$ideo_r <- round((cces_2009.2$ideo_r-min(na.omit(cces_2009.2$ideo_r)))/
                              (max(na.omit(cces_2009.2$ideo_r))-min(na.omit(cces_2009.2$ideo_r))),2)

cces_2009.2$ideo_self_na <- round((cces_2009.2$ideo_self_na-min(na.omit(cces_2009.2$ideo_self_na)))/
                                    (max(na.omit(cces_2009.2$ideo_self_na))-min(na.omit(cces_2009.2$ideo_self_na))),2)

cces_2009.2$ideo_d_na <- round((cces_2009.2$ideo_d_na-min(na.omit(cces_2009.2$ideo_d_na)))/
                                 (max(na.omit(cces_2009.2$ideo_d_na))-min(na.omit(cces_2009.2$ideo_d_na))),2)

cces_2009.2$ideo_r_na <- round((cces_2009.2$ideo_r_na-min(na.omit(cces_2009.2$ideo_r_na)))/
                                 (max(na.omit(cces_2009.2$ideo_r_na))-min(na.omit(cces_2009.2$ideo_r_na))),2)


# 2010 ----
# CC309a: 1 - R, 2 - D, 3 - neither, 4 - not sure, 8 - skipped, 9 - not asked 
# CC309b: 1 - R, 2 - D, 3 - neither, 4 - not sure, 8 - skipped, 9 - not asked 
# CC310a: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked 
# CC310b: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked 
# CC310c: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked 
# CC310d: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked 
# CC334A: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - skipped, 9 - not asked 
# CC334D: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - skipped, 9 - not asked 
# CC334E: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - skipped, 9 - not asked 
# perceive_race: 1 - white, 2 - black, 3 - hispanic, 4 - asian, 5 - other, 6 - not sure, 8 - skipped, 9 - not asked  
# CC308b: 1 - "strongly a", 2 - "somewhat a", 3 - "somewhat d", 4 - "strongly d", 5 - "not sure", 8 - "skipped", 0 - "not asked"

# Make sure variables are numeric 
cces_2010_sub <- cces_2010_sub %>%
  mutate(ideo_self = as.numeric(as.character(ideo_self)),
         ideo_d = as.numeric(as.character(ideo_d)),
         ideo_r = as.numeric(as.character(ideo_r))) 

# Recode Values 
cces_2010.2 <- cces_2010_sub %>%
  mutate(cong_approval = case_when(cong_approval==2 ~ "somewhat a",
                                   cong_approval==3 ~ "somewhat d",
                                   cong_approval==1 ~ "strongly a",
                                   cong_approval==4 ~ "strongly d",
                                   cong_approval==5 ~ "not sure",
                                   cong_approval==8 ~ "skipped",
                                   cong_approval==9 ~ "not asked",
                                   TRUE ~ "."),
         know_maj_house = case_when(know_maj_house==1 ~ "R", 
                                    know_maj_house==2 ~ "D",
                                    know_maj_house==3 ~ "neither",
                                    know_maj_house==4 ~ "not sure",
                                    know_maj_house==8 ~ ".", 
                                    know_maj_house==9 ~ ".", 
                                    TRUE ~ "."),
         know_maj_sen = case_when(know_maj_sen==1 ~ "R", 
                                  know_maj_sen==2 ~ "D",
                                  know_maj_sen==3 ~ "neither",
                                  know_maj_sen==4 ~ "not sure",
                                  know_maj_sen==8 ~ ".", 
                                  know_maj_sen==9 ~ ".", 
                                  TRUE ~ "."),
         recall_gov = case_when(recall_gov==1 ~ "not heard of", 
                                recall_gov==2 ~ "R", 
                                recall_gov==3 ~ "D",
                                recall_gov==4 ~ "other/I",
                                recall_gov==5 ~ "not sure",
                                recall_gov==8 ~ ".", 
                                recall_gov==9 ~ ".", 
                                TRUE ~ "."),
         recall_sen1 = case_when(recall_sen1==1 ~ "not heard of", 
                                 recall_sen1==2 ~ "R", 
                                 recall_sen1==3 ~ "D",
                                 recall_sen1==4 ~ "other/I",
                                 recall_sen1==5 ~ "not sure",
                                 recall_sen1==8 ~ ".", 
                                 recall_sen1==9 ~ ".", 
                                 TRUE ~ "."),
         recall_sen2 = case_when(recall_sen2==1 ~ "not heard of", 
                                 recall_sen2==2 ~ "R", 
                                 recall_sen2==3 ~ "D",
                                 recall_sen2==4 ~ "other/I",
                                 recall_sen2==5 ~ "not sure",
                                 recall_sen2==8 ~ ".", 
                                 recall_sen2==9 ~ ".", 
                                 TRUE ~ "."),
         recall_rep = case_when(recall_rep==1 ~ "not heard of", 
                                recall_rep==2 ~ "R", 
                                recall_rep==3 ~ "D",
                                recall_rep==4 ~ "other/I",
                                recall_rep==5 ~ "not sure",
                                recall_rep==8 ~ ".", 
                                recall_rep==9 ~ ".", 
                                TRUE ~ "."),
         # skipped and not asked as NAs 
         ideo_self_na = (case_when(ideo_self==8 ~ NA_real_, ideo_self==9 ~ NA_real_, TRUE ~ ideo_self)),
         ideo_d_na = (case_when(ideo_d==8 ~ NA_real_, ideo_d==9 ~ NA_real_, TRUE ~ ideo_d)),
         ideo_r_na = (case_when(ideo_r==8 ~ NA_real_, ideo_r==9 ~ NA_real_, TRUE ~ ideo_r)))
# don't know as middle value - 8 and 9 are skipped and not asked, so no don't knows 
# leave ideo_self, ideo_d, and ideo_r as is 


# Convert ideology likert scale to 0-1 scale 
cces_2010.2$ideo_self <- round((cces_2010.2$ideo_self-min(na.omit(cces_2010.2$ideo_self)))/
                                 (max(na.omit(cces_2010.2$ideo_self))-min(na.omit(cces_2010.2$ideo_self))),2)

cces_2010.2$ideo_d <- round((cces_2010.2$ideo_d-min(na.omit(cces_2010.2$ideo_d)))/
                              (max(na.omit(cces_2010.2$ideo_d))-min(na.omit(cces_2010.2$ideo_d))),2)

cces_2010.2$ideo_r <- round((cces_2010.2$ideo_r-min(na.omit(cces_2010.2$ideo_r)))/
                              (max(na.omit(cces_2010.2$ideo_r))-min(na.omit(cces_2010.2$ideo_r))),2)

cces_2010.2$ideo_self_na <- round((cces_2010.2$ideo_self_na-min(na.omit(cces_2010.2$ideo_self_na)))/
                                    (max(na.omit(cces_2010.2$ideo_self_na))-min(na.omit(cces_2010.2$ideo_self_na))),2)

cces_2010.2$ideo_d_na <- round((cces_2010.2$ideo_d_na-min(na.omit(cces_2010.2$ideo_d_na)))/
                                 (max(na.omit(cces_2010.2$ideo_d_na))-min(na.omit(cces_2010.2$ideo_d_na))),2)

cces_2010.2$ideo_r_na <- round((cces_2010.2$ideo_r_na-min(na.omit(cces_2010.2$ideo_r_na)))/
                                 (max(na.omit(cces_2010.2$ideo_r_na))-min(na.omit(cces_2010.2$ideo_r_na))),2)


# 2011 ----
# CC321a: 1 - R, 2 - D, 3 - neither, 4 - not sure, 8 - skipped, 9 - not asked 
# CC321b: 1 - R, 2 - D, 3 - neither, 4 - not sure, 8 - skipped, 9 - not asked 
# CC322a: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked
# CC322b: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked
# CC322c: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked
# CC322d: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked
# CC342A: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 98 - skipped, 99 - not asked 
# CC342D: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 98 - skipped, 99 - not asked 
# CC342E: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 98 - skipped, 99 - not asked 
# CC320b: 1 - "strongly a", 2 - "somewhat a", 3 - "somewhat d", 4 - "strongly d", 5 - "not sure", 8 - "skipped", 0 - "not asked"

# Make sure variables are numeric 
cces_2011_sub <- cces_2011_sub %>%
  mutate(ideo_self = as.numeric(as.character(ideo_self)),
         ideo_d = as.numeric(as.character(ideo_d)),
         ideo_r = as.numeric(as.character(ideo_r))) 

# Recode Values 
cces_2011.2 <- cces_2011_sub %>%
  mutate(cong_approval = case_when(cong_approval==2 ~ "somewhat a",
                                   cong_approval==3 ~ "somewhat d",
                                   cong_approval==1 ~ "strongly a",
                                   cong_approval==4 ~ "strongly d",
                                   cong_approval==5 ~ "not sure",
                                   cong_approval==8 ~ "skipped",
                                   cong_approval==9 ~ "not asked",
                                   TRUE ~ "."),
         know_maj_house = case_when(know_maj_house==1 ~ "R", 
                                    know_maj_house==2 ~ "D",
                                    know_maj_house==3 ~ "neither",
                                    know_maj_house==4 ~ "not sure",
                                    know_maj_house==8 ~ ".", 
                                    know_maj_house==9 ~ ".", 
                                    TRUE ~ "."),
         know_maj_sen = case_when(know_maj_sen==1 ~ "R", 
                                  know_maj_sen==2 ~ "D",
                                  know_maj_sen==3 ~ "neither",
                                  know_maj_sen==4 ~ "not sure",
                                  know_maj_sen==8 ~ ".", 
                                  know_maj_sen==9 ~ ".", 
                                  TRUE ~ "."),
         recall_gov = case_when(recall_gov==1 ~ "not heard of", 
                                recall_gov==2 ~ "R", 
                                recall_gov==3 ~ "D",
                                recall_gov==4 ~ "other/I",
                                recall_gov==5 ~ "not sure",
                                recall_gov==8 ~ ".", 
                                recall_gov==9 ~ ".", 
                                TRUE ~ "."),
         recall_sen1 = case_when(recall_sen1==1 ~ "not heard of", 
                                 recall_sen1==2 ~ "R", 
                                 recall_sen1==3 ~ "D",
                                 recall_sen1==4 ~ "other/I",
                                 recall_sen1==5 ~ "not sure",
                                 recall_sen1==8 ~ ".", 
                                 recall_sen1==9 ~ ".", 
                                 TRUE ~ "."),
         recall_sen2 = case_when(recall_sen2==1 ~ "not heard of", 
                                 recall_sen2==2 ~ "R", 
                                 recall_sen2==3 ~ "D",
                                 recall_sen2==4 ~ "other/I",
                                 recall_sen2==5 ~ "not sure",
                                 recall_sen2==8 ~ ".", 
                                 recall_sen2==9 ~ ".", 
                                 TRUE ~ "."),
         recall_rep = case_when(recall_rep==1 ~ "not heard of", 
                                recall_rep==2 ~ "R", 
                                recall_rep==3 ~ "D",
                                recall_rep==4 ~ "other/I",
                                recall_rep==5 ~ "not sure",
                                recall_rep==8 ~ ".", 
                                recall_rep==9 ~ ".", 
                                TRUE ~ "."),
         # skipped and not asked as NAs 
         ideo_self_na = (case_when(ideo_self==8 ~ NA_real_, ideo_self==9 ~ NA_real_, TRUE ~ ideo_self)),
         ideo_d_na = (case_when(ideo_d==8 ~ NA_real_, ideo_d==9 ~ NA_real_, TRUE ~ ideo_d)),
         ideo_r_na = (case_when(ideo_r==8 ~ NA_real_, ideo_r==9 ~ NA_real_, TRUE ~ ideo_r)))
# don't know as middle value - 8 and 9 are skipped and not asked, so no don't knows 
# leave ideo_self, ideo_d, and ideo_r as is

# Convert ideology likert to 0-1 scale 
cces_2011.2$ideo_self <- round((cces_2011.2$ideo_self-min(na.omit(cces_2011.2$ideo_self)))/
                                 (max(na.omit(cces_2011.2$ideo_self))-min(na.omit(cces_2011.2$ideo_self))),2)

cces_2011.2$ideo_d <- round((cces_2011.2$ideo_d-min(na.omit(cces_2011.2$ideo_d)))/
                              (max(na.omit(cces_2011.2$ideo_d))-min(na.omit(cces_2011.2$ideo_d))),2)

cces_2011.2$ideo_r <- round((cces_2011.2$ideo_r-min(na.omit(cces_2011.2$ideo_r)))/
                              (max(na.omit(cces_2011.2$ideo_r))-min(na.omit(cces_2011.2$ideo_r))),2)

cces_2011.2$ideo_self_na <- round((cces_2011.2$ideo_self_na-min(na.omit(cces_2011.2$ideo_self_na)))/
                                    (max(na.omit(cces_2011.2$ideo_self_na))-min(na.omit(cces_2011.2$ideo_self_na))),2)

cces_2011.2$ideo_d_na <- round((cces_2011.2$ideo_d_na-min(na.omit(cces_2011.2$ideo_d_na)))/
                                 (max(na.omit(cces_2011.2$ideo_d_na))-min(na.omit(cces_2011.2$ideo_d_na))),2)

cces_2011.2$ideo_r_na <- round((cces_2011.2$ideo_r_na-min(na.omit(cces_2011.2$ideo_r_na)))/
                                 (max(na.omit(cces_2011.2$ideo_r_na))-min(na.omit(cces_2011.2$ideo_r_na))),2)


# 2012 ----
# CC309a: 1 - R, 2 - D, 3 - neither, 4 - not sure, 8 - skipped, 9 - not asked 
# CC309b: 1 - R, 2 - D, 3 - neither, 4 - not sure, 8 - skipped, 9 - not asked 
# CC310a: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked
# CC310b: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked
# CC310c: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked
# CC310d: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked
# CC334A: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - not sure, 98 - skipped, 99 - not asked 
# CC334E: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - not sure, 98 - skipped, 99 - not asked 
# CC334F: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - not sure, 98 - skipped, 99 - not asked 
# perceive_race: 1 - white, 2 - black, 3 - hispanic, 4 - asian, 5 - other, 6 - not sure, 8 - skipped, 9 - not asked  
# CC308b: 1 - "strongly a", 2 - "somewhat a", 3 - "somewhat d", 4 - "strongly d", 5 - "not sure", 8 - "skipped", 0 - "not asked"

# Make sure variables are numeric 
cces_2012_sub <- cces_2012_sub %>%
  mutate(ideo_self = as.numeric(as.character(ideo_self)),
         ideo_d = as.numeric(as.character(ideo_d)),
         ideo_r = as.numeric(as.character(ideo_r))) 

# Recode Values 
cces_2012.2 <- cces_2012_sub %>%
  mutate(cong_approval = case_when(cong_approval==2 ~ "somewhat a",
                                   cong_approval==3 ~ "somewhat d",
                                   cong_approval==1 ~ "strongly a",
                                   cong_approval==4 ~ "strongly d",
                                   cong_approval==5 ~ "not sure",
                                   cong_approval==8 ~ "skipped",
                                   cong_approval==9 ~ "not asked",
                                   TRUE ~ "."),
         know_maj_house = case_when(know_maj_house==1 ~ "R", 
                                    know_maj_house==2 ~ "D",
                                    know_maj_house==3 ~ "neither",
                                    know_maj_house==4 ~ "not sure",
                                    know_maj_house==8 ~ ".", 
                                    know_maj_house==9 ~ ".", 
                                    TRUE ~ "."),
         know_maj_sen = case_when(know_maj_sen==1 ~ "R", 
                                  know_maj_sen==2 ~ "D",
                                  know_maj_sen==3 ~ "neither",
                                  know_maj_sen==4 ~ "not sure",
                                  know_maj_sen==8 ~ ".", 
                                  know_maj_sen==9 ~ ".", 
                                  TRUE ~ "."),
         recall_gov = case_when(recall_gov==1 ~ "not heard of", 
                                recall_gov==2 ~ "R", 
                                recall_gov==3 ~ "D",
                                recall_gov==4 ~ "other/I",
                                recall_gov==5 ~ "not sure",
                                recall_gov==8 ~ ".", 
                                recall_gov==9 ~ ".", 
                                TRUE ~ "."),
         recall_sen1 = case_when(recall_sen1==1 ~ "not heard of", 
                                 recall_sen1==2 ~ "R", 
                                 recall_sen1==3 ~ "D",
                                 recall_sen1==4 ~ "other/I",
                                 recall_sen1==5 ~ "not sure",
                                 recall_sen1==8 ~ ".", 
                                 recall_sen1==9 ~ ".", 
                                 TRUE ~ "."),
         recall_sen2 = case_when(recall_sen2==1 ~ "not heard of", 
                                 recall_sen2==2 ~ "R", 
                                 recall_sen2==3 ~ "D",
                                 recall_sen2==4 ~ "other/I",
                                 recall_sen2==5 ~ "not sure",
                                 recall_sen2==8 ~ ".", 
                                 recall_sen2==9 ~ ".", 
                                 TRUE ~ "."),
         recall_rep = case_when(recall_rep==1 ~ "not heard of", 
                                recall_rep==2 ~ "R", 
                                recall_rep==3 ~ "D",
                                recall_rep==4 ~ "other/I",
                                recall_rep==5 ~ "not sure",
                                recall_rep==8 ~ ".", 
                                recall_rep==9 ~ ".", 
                                TRUE ~ "."),
         # don't know/skipped/not asked as NAs 
         ideo_self_na = (case_when(ideo_self==8 ~ NA_real_, ideo_self==98 ~ NA_real_, 
                                   ideo_self==99 ~ NA_real_, TRUE ~ ideo_self)),
         ideo_d_na = (case_when(ideo_d==8 ~ NA_real_, ideo_d==98 ~ NA_real_, 
                                ideo_self==99 ~ NA_real_, TRUE ~ ideo_d)),
         ideo_r_na = (case_when(ideo_r==8 ~ NA_real_, ideo_r==98 ~ NA_real_, 
                                ideo_self==99 ~ NA_real_, TRUE ~ ideo_r)),
         # don't know as middle values 
         ideo_self = ifelse(ideo_self==8, 4, ideo_self),
         ideo_d = ifelse(ideo_d==8, 4, ideo_d),
         ideo_r = ifelse(ideo_r==8, 4, ideo_r)) 

# Convert ideology likert scale to 0-1 scale 
cces_2012.2$ideo_self <- round((cces_2012.2$ideo_self-min(na.omit(cces_2012.2$ideo_self)))/
                                 (max(na.omit(cces_2012.2$ideo_self))-min(na.omit(cces_2012.2$ideo_self))),2)

cces_2012.2$ideo_d <- round((cces_2012.2$ideo_d-min(na.omit(cces_2012.2$ideo_d)))/
                              (max(na.omit(cces_2012.2$ideo_d))-min(na.omit(cces_2012.2$ideo_d))),2)

cces_2012.2$ideo_r <- round((cces_2012.2$ideo_r-min(na.omit(cces_2012.2$ideo_r)))/
                              (max(na.omit(cces_2012.2$ideo_r))-min(na.omit(cces_2012.2$ideo_r))),2)

cces_2012.2$ideo_self_na <- round((cces_2012.2$ideo_self_na-min(na.omit(cces_2012.2$ideo_self_na)))/
                                    (max(na.omit(cces_2012.2$ideo_self_na))-min(na.omit(cces_2012.2$ideo_self_na))),2)

cces_2012.2$ideo_d_na <- round((cces_2012.2$ideo_d_na-min(na.omit(cces_2012.2$ideo_d_na)))/
                                 (max(na.omit(cces_2012.2$ideo_d_na))-min(na.omit(cces_2012.2$ideo_d_na))),2)

cces_2012.2$ideo_r_na <- round((cces_2012.2$ideo_r_na-min(na.omit(cces_2012.2$ideo_r_na)))/
                                 (max(na.omit(cces_2012.2$ideo_r_na))-min(na.omit(cces_2012.2$ideo_r_na))),2)


# 2013 ----
# CC13_310a: 1 - R, 2 - D, 3 - neither, 4 - not sure, 8 - skipped, 9 - not asked 
# CC13_310b: 1 - R, 2 - D, 3 - neither, 4 - not sure, 8 - skipped, 9 - not asked 
# CC13_311a: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked
# CC13_311b: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked
# CC13_311c: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked
# CC13_311d: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked
# CC334A: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - not sure, 8 - skipped, 9 - not asked 
# CC334E: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - not sure, 8 - skipped, 9 - not asked 
# CC334F: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - not sure, 8 - skipped, 9 - not asked 
# CC312b: 1 - "strongly a", 2 - "somewhat a", 3 - "somewhat d", 4 - "strongly d", 5 - "not sure", 8 - "skipped", 0 - "not asked"

# Make sure variables are numeric 
cces_2013_sub <- cces_2013_sub %>%
  mutate(ideo_self = as.numeric(as.character(ideo_self)),
         ideo_d = as.numeric(as.character(ideo_d)),
         ideo_r = as.numeric(as.character(ideo_r))) 

# Recode Values 
cces_2013.2 <- cces_2013_sub %>%
  mutate(cong_approval = case_when(cong_approval==2 ~ "somewhat a",
                                   cong_approval==3 ~ "somewhat d",
                                   cong_approval==1 ~ "strongly a",
                                   cong_approval==4 ~ "strongly d",
                                   cong_approval==5 ~ "not sure",
                                   cong_approval==8 ~ "skipped",
                                   cong_approval==9 ~ "not asked",
                                   TRUE ~ "."),
         know_maj_house = case_when(know_maj_house==1 ~ "R", 
                                    know_maj_house==2 ~ "D",
                                    know_maj_house==3 ~ "neither",
                                    know_maj_house==4 ~ "not sure",
                                    know_maj_house==8 ~ ".", 
                                    know_maj_house==9 ~ ".", 
                                    TRUE ~ "."),
         know_maj_sen = case_when(know_maj_sen==1 ~ "R", 
                                  know_maj_sen==2 ~ "D",
                                  know_maj_sen==3 ~ "neither",
                                  know_maj_sen==4 ~ "not sure",
                                  know_maj_sen==8 ~ ".", 
                                  know_maj_sen==9 ~ ".", 
                                  TRUE ~ "."),
         recall_gov = case_when(recall_gov==1 ~ "not heard of", 
                                recall_gov==2 ~ "R", 
                                recall_gov==3 ~ "D",
                                recall_gov==4 ~ "other/I",
                                recall_gov==5 ~ "not sure",
                                recall_gov==8 ~ ".", 
                                recall_gov==9 ~ ".", 
                                TRUE ~ "."),
         recall_sen1 = case_when(recall_sen1==1 ~ "not heard of", 
                                 recall_sen1==2 ~ "R", 
                                 recall_sen1==3 ~ "D",
                                 recall_sen1==4 ~ "other/I",
                                 recall_sen1==5 ~ "not sure",
                                 recall_sen1==8 ~ ".", 
                                 recall_sen1==9 ~ ".", 
                                 TRUE ~ "."),
         recall_sen2 = case_when(recall_sen2==1 ~ "not heard of", 
                                 recall_sen2==2 ~ "R", 
                                 recall_sen2==3 ~ "D",
                                 recall_sen2==4 ~ "other/I",
                                 recall_sen2==5 ~ "not sure",
                                 recall_sen2==8 ~ ".", 
                                 recall_sen2==9 ~ ".", 
                                 TRUE ~ "."),
         recall_rep = case_when(recall_rep==1 ~ "not heard of", 
                                recall_rep==2 ~ "R", 
                                recall_rep==3 ~ "D",
                                recall_rep==4 ~ "other/I",
                                recall_rep==5 ~ "not sure",
                                recall_rep==8 ~ ".", 
                                recall_rep==9 ~ ".", 
                                TRUE ~ "."),
         # don't know/not asked as NAs 
         ideo_self_na = (case_when(ideo_self==8 ~ NA_real_, ideo_self==9 ~ NA_real_, TRUE ~ ideo_self)),
         ideo_d_na = (case_when(ideo_d==8 ~ NA_real_, ideo_d==9 ~ NA_real_, TRUE ~ ideo_d)),
         ideo_r_na = (case_when(ideo_r==8 ~ NA_real_, ideo_r==9 ~ NA_real_, TRUE ~ ideo_r)),
         # don't know as middle values
         ideo_self = ifelse(ideo_self==8 | ideo_self==9, 4, ideo_self),
         ideo_d = ifelse(ideo_d==8, 4, ideo_d),
         ideo_r = ifelse(ideo_r==8, 4, ideo_r)) 

# Convert ideology likert scale to 0-1 scale 
cces_2013.2$ideo_self <- round((cces_2013.2$ideo_self-min(na.omit(cces_2013.2$ideo_self)))/
                                 (max(na.omit(cces_2013.2$ideo_self))-min(na.omit(cces_2013.2$ideo_self))),2)

cces_2013.2$ideo_d <- round((cces_2013.2$ideo_d-min(na.omit(cces_2013.2$ideo_d)))/
                              (max(na.omit(cces_2013.2$ideo_d))-min(na.omit(cces_2013.2$ideo_d))),2)

cces_2013.2$ideo_r <- round((cces_2013.2$ideo_r-min(na.omit(cces_2013.2$ideo_r)))/
                              (max(na.omit(cces_2013.2$ideo_r))-min(na.omit(cces_2013.2$ideo_r))),2)

cces_2013.2$ideo_self_na <- round((cces_2013.2$ideo_self_na-min(na.omit(cces_2013.2$ideo_self_na)))/
                                    (max(na.omit(cces_2013.2$ideo_self_na))-min(na.omit(cces_2013.2$ideo_self_na))),2)

cces_2013.2$ideo_d_na <- round((cces_2013.2$ideo_d_na-min(na.omit(cces_2013.2$ideo_d_na)))/
                                 (max(na.omit(cces_2013.2$ideo_d_na))-min(na.omit(cces_2013.2$ideo_d_na))),2)

cces_2013.2$ideo_r_na <- round((cces_2013.2$ideo_r_na-min(na.omit(cces_2013.2$ideo_r_na)))/
                                 (max(na.omit(cces_2013.2$ideo_r_na))-min(na.omit(cces_2013.2$ideo_r_na))),2)

# 2014 ---- 
# CC14_309a: 1 - R, 2 - D, 3 - neither, 4 - not sure, . - skipped
# CC14_309b: 1 - R, 2 - D, 3 - neither, 4 - not sure, . - skipped
# CC14_310a: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, . - skipped
# CC14_310b: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, . - skipped
# CC14_310c: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, . - skipped
# CC14_310d: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, . - skipped
# CC334A: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - not sure, . - skipped
# CC334K: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - not sure, . - skipped
# CC334L: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - not sure, . - skipped
# perceive_race: 1 - white, 2 - black, 3 - hispanic, 4 - asian, 5 - other, 6 - not sure, 8 - skipped, 9 - not asked  
# CC14_308b: 1 - "strongly a", 2 - "somewhat a", 3 - "somewhat d", 4 - "strongly d", 5 - "not sure", 8 - "skipped", 0 - "not asked"

# Make sure variables are numeric 
cces_2014_sub <- cces_2014_sub %>%
  mutate(ideo_self = as.numeric(as.character(ideo_self)),
         ideo_d = as.numeric(as.character(ideo_d)),
         ideo_r = as.numeric(as.character(ideo_r))) 

# Recode Values 
cces_2014.2 <- cces_2014_sub %>%
  mutate(cong_approval = case_when(cong_approval==2 ~ "somewhat a",
                                   cong_approval==3 ~ "somewhat d",
                                   cong_approval==1 ~ "strongly a",
                                   cong_approval==4 ~ "strongly d",
                                   cong_approval==5 ~ "not sure",
                                   cong_approval==8 ~ "skipped",
                                   cong_approval==9 ~ "not asked",
                                   TRUE ~ "."),
         know_maj_house = case_when(know_maj_house==1 ~ "R", 
                                    know_maj_house==2 ~ "D",
                                    know_maj_house==3 ~ "neither",
                                    know_maj_house==4 ~ "not sure",
                                    TRUE ~ "."),
         know_maj_sen = case_when(know_maj_sen==1 ~ "R", 
                                  know_maj_sen==2 ~ "D",
                                  know_maj_sen==3 ~ "neither",
                                  know_maj_sen==4 ~ "not sure",
                                  TRUE ~ "."),
         recall_gov = case_when(recall_gov==1 ~ "not heard of", 
                                recall_gov==2 ~ "R", 
                                recall_gov==3 ~ "D",
                                recall_gov==4 ~ "other/I",
                                recall_gov==5 ~ "not sure",
                                TRUE ~ "."),
         recall_sen1 = case_when(recall_sen1==1 ~ "not heard of", 
                                 recall_sen1==2 ~ "R", 
                                 recall_sen1==3 ~ "D",
                                 recall_sen1==4 ~ "other/I",
                                 recall_sen1==5 ~ "not sure",
                                 TRUE ~ "."),
         recall_sen2 = case_when(recall_sen2==1 ~ "not heard of", 
                                 recall_sen2==2 ~ "R", 
                                 recall_sen2==3 ~ "D",
                                 recall_sen2==4 ~ "other/I",
                                 recall_sen2==5 ~ "not sure",
                                 TRUE ~ "."),
         recall_rep = case_when(recall_rep==1 ~ "not heard of", 
                                recall_rep==2 ~ "R", 
                                recall_rep==3 ~ "D",
                                recall_rep==4 ~ "other/I",
                                recall_rep==5 ~ "not sure",
                                TRUE ~ "."),
         # don't know as NA 
         ideo_self_na = (case_when(ideo_self==8 ~ NA_real_, TRUE ~ ideo_self)),
         ideo_d_na = (case_when(ideo_d==8 ~ NA_real_, TRUE ~ ideo_d)),
         ideo_r_na = (case_when(ideo_r==8 ~ NA_real_, TRUE ~ ideo_r)),
         # don't know as middle value 
         ideo_self = ifelse(ideo_self==8, 4, ideo_self),
         ideo_d = ifelse(ideo_d==8, 4, ideo_d),
         ideo_r = ifelse(ideo_r==8, 4, ideo_r)) 

# Convert ideology likert scale to 0-1 scale 
cces_2014.2$ideo_self <- round((cces_2014.2$ideo_self-min(na.omit(cces_2014.2$ideo_self)))/
                                 (max(na.omit(cces_2014.2$ideo_self))-min(na.omit(cces_2014.2$ideo_self))),2)

cces_2014.2$ideo_d <- round((cces_2014.2$ideo_d-min(na.omit(cces_2014.2$ideo_d)))/
                              (max(na.omit(cces_2014.2$ideo_d))-min(na.omit(cces_2014.2$ideo_d))),2)

cces_2014.2$ideo_r <- round((cces_2014.2$ideo_r-min(na.omit(cces_2014.2$ideo_r)))/
                              (max(na.omit(cces_2014.2$ideo_r))-min(na.omit(cces_2014.2$ideo_r))),2)

cces_2014.2$ideo_self_na <- round((cces_2014.2$ideo_self_na-min(na.omit(cces_2014.2$ideo_self_na)))/
                                    (max(na.omit(cces_2014.2$ideo_self_na))-min(na.omit(cces_2014.2$ideo_self_na))),2)

cces_2014.2$ideo_d_na <- round((cces_2014.2$ideo_d_na-min(na.omit(cces_2014.2$ideo_d_na)))/
                                 (max(na.omit(cces_2014.2$ideo_d_na))-min(na.omit(cces_2014.2$ideo_d_na))),2)

cces_2014.2$ideo_r_na <- round((cces_2014.2$ideo_r_na-min(na.omit(cces_2014.2$ideo_r_na)))/
                                 (max(na.omit(cces_2014.2$ideo_r_na))-min(na.omit(cces_2014.2$ideo_r_na))),2)


# 2015 ----
# CC15_310a: 1 - R, 2 - D, 3 - neither, 4 - not sure, 8 - skipped, 9 - not asked 
# CC15_310b: 1 - R, 2 - D, 3 - neither, 4 - not sure, 8 - skipped, 9 - not asked 
# CC15_311a: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked
# CC15_311b: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked
# CC15_311c: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked
# CC15_311d: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked
# CC15_340a: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - not sure, 8 - skipped, 9 - not asked 
# CC15_340d: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - not sure, 8 - skipped, 9 - not asked 
# CC15_340e: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - not sure, 8 - skipped, 9 - not asked 
# CC15_312b: 1 - "strongly a", 2 - "somewhat a", 3 - "somewhat d", 4 - "strongly d", 5 - "not sure", 8 - "skipped", 0 - "not asked"

# Make sure variables are numeric 
cces_2015_sub <- cces_2015_sub %>%
  mutate(ideo_self = as.numeric(as.character(ideo_self)),
         ideo_d = as.numeric(as.character(ideo_d)),
         ideo_r = as.numeric(as.character(ideo_r))) 

# Recode Values 
cces_2015.2 <- cces_2015_sub %>%
  mutate(cong_approval = case_when(cong_approval==2 ~ "somewhat a",
                                   cong_approval==3 ~ "somewhat d",
                                   cong_approval==1 ~ "strongly a",
                                   cong_approval==4 ~ "strongly d",
                                   cong_approval==5 ~ "not sure",
                                   cong_approval==8 ~ "skipped",
                                   cong_approval==9 ~ "not asked",
                                   TRUE ~ "."),
         know_maj_house = case_when(know_maj_house==1 ~ "R", 
                                    know_maj_house==2 ~ "D",
                                    know_maj_house==3 ~ "neither",
                                    know_maj_house==4 ~ "not sure",
                                    know_maj_house==8 ~ ".", 
                                    know_maj_house==9 ~ ".", 
                                    TRUE ~ "."),
         know_maj_sen = case_when(know_maj_sen==1 ~ "R", 
                                  know_maj_sen==2 ~ "D",
                                  know_maj_sen==3 ~ "neither",
                                  know_maj_sen==4 ~ "not sure",
                                  know_maj_sen==8 ~ ".", 
                                  know_maj_sen==9 ~ ".", 
                                  TRUE ~ "."),
         recall_gov = case_when(recall_gov==1 ~ "not heard of", 
                                recall_gov==2 ~ "R", 
                                recall_gov==3 ~ "D",
                                recall_gov==4 ~ "other/I",
                                recall_gov==5 ~ "not sure",
                                recall_gov==8 ~ ".", 
                                recall_gov==9 ~ ".", 
                                TRUE ~ "."),
         recall_sen1 = case_when(recall_sen1==1 ~ "not heard of", 
                                 recall_sen1==2 ~ "R", 
                                 recall_sen1==3 ~ "D",
                                 recall_sen1==4 ~ "other/I",
                                 recall_sen1==5 ~ "not sure",
                                 recall_sen1==8 ~ ".", 
                                 recall_sen1==9 ~ ".", 
                                 TRUE ~ "."),
         recall_sen2 = case_when(recall_sen2==1 ~ "not heard of", 
                                 recall_sen2==2 ~ "R", 
                                 recall_sen2==3 ~ "D",
                                 recall_sen2==4 ~ "other/I",
                                 recall_sen2==5 ~ "not sure",
                                 recall_sen2==8 ~ ".", 
                                 recall_sen2==9 ~ ".", 
                                 TRUE ~ "."),
         recall_rep = case_when(recall_rep==1 ~ "not heard of", 
                                recall_rep==2 ~ "R", 
                                recall_rep==3 ~ "D",
                                recall_rep==4 ~ "other/I",
                                recall_rep==5 ~ "not sure",
                                recall_rep==8 ~ ".", 
                                recall_rep==9 ~ ".", 
                                TRUE ~ "."),
         # don't know/not asked as NA 
         ideo_self_na = (case_when(ideo_self==8 ~ NA_real_, ideo_self==98 ~ NA_real_, TRUE ~ ideo_self)),
         ideo_d_na = (case_when(ideo_d==8 ~ NA_real_, ideo_d==98 ~ NA_real_, TRUE ~ ideo_d)),
         ideo_r_na = (case_when(ideo_r==8 ~ NA_real_, ideo_r==98 ~ NA_real_, TRUE ~ ideo_r)),
         # don't know as middle value 
         ideo_self = ifelse(ideo_self==8, 4, ideo_self),
         ideo_d = ifelse(ideo_d==8, 4, ideo_d),
         ideo_r = ifelse(ideo_r==8, 4, ideo_r))


# Convert ideology likert scale to 0-1 scale 
cces_2015.2$ideo_self <- round((cces_2015.2$ideo_self-min(na.omit(cces_2015.2$ideo_self)))/
                                 (max(na.omit(cces_2015.2$ideo_self))-min(na.omit(cces_2015.2$ideo_self))),2)

cces_2015.2$ideo_d <- round((cces_2015.2$ideo_d-min(na.omit(cces_2015.2$ideo_d)))/
                              (max(na.omit(cces_2015.2$ideo_d))-min(na.omit(cces_2015.2$ideo_d))),2)

cces_2015.2$ideo_r <- round((cces_2015.2$ideo_r-min(na.omit(cces_2015.2$ideo_r)))/
                              (max(na.omit(cces_2015.2$ideo_r))-min(na.omit(cces_2015.2$ideo_r))),2)

cces_2015.2$ideo_self_na <- round((cces_2015.2$ideo_self_na-min(na.omit(cces_2015.2$ideo_self_na)))/
                                    (max(na.omit(cces_2015.2$ideo_self_na))-min(na.omit(cces_2015.2$ideo_self_na))),2)

cces_2015.2$ideo_d_na <- round((cces_2015.2$ideo_d_na-min(na.omit(cces_2015.2$ideo_d_na)))/
                                 (max(na.omit(cces_2015.2$ideo_d_na))-min(na.omit(cces_2015.2$ideo_d_na))),2)

cces_2015.2$ideo_r_na <- round((cces_2015.2$ideo_r_na-min(na.omit(cces_2015.2$ideo_r_na)))/
                                 (max(na.omit(cces_2015.2$ideo_r_na))-min(na.omit(cces_2015.2$ideo_r_na))),2)


# 2016 ---- 
# CC16_321a: 1 - R, 2 - D, 3 - neither, 4 - not sure, 8 - skipped, 9 - not asked 
# CC16_321b: 1 - R, 2 - D, 3 - neither, 4 - not sure, 8 - skipped, 9 - not asked 
# CC16_322a: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked
# CC16_322b: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked
# CC16_322c: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked
# CC16_322d: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked
# CC16_340a: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - not sure, 98 - skipped, 99 - not asked 
# CC16_340g: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - not sure, 98 - skipped, 99 - not asked
# CC16_340h: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - not sure, 98 - skipped, 99 - not asked
# perceive_race: 1 - white, 2 - black, 3 - hispanic, 4 - asian, 5 - other, 6 - not sure, 8 - skipped, 9 - not asked  
# CC16_320b: 1 - "strongly a", 2 - "somewhat a", 3 - "somewhat d", 4 - "strongly d", 5 - "not sure", 8 - "skipped", 0 - "not asked"

# Make sure variables are numeric 
cces_2016_sub <- cces_2016_sub %>%
  mutate(ideo_self = as.numeric(as.character(ideo_self)),
         ideo_d = as.numeric(as.character(ideo_d)),
         ideo_r = as.numeric(as.character(ideo_r))) 

# Recode Values 
cces_2016.2 <- cces_2016_sub %>%
  mutate(cong_approval = case_when(cong_approval==2 ~ "somewhat a",
                                   cong_approval==3 ~ "somewhat d",
                                   cong_approval==1 ~ "strongly a",
                                   cong_approval==4 ~ "strongly d",
                                   cong_approval==5 ~ "not sure",
                                   cong_approval==8 ~ "skipped",
                                   cong_approval==9 ~ "not asked",
                                   TRUE ~ "."),
         know_maj_house = case_when(know_maj_house==1 ~ "R", 
                                    know_maj_house==2 ~ "D",
                                    know_maj_house==3 ~ "neither",
                                    know_maj_house==4 ~ "not sure",
                                    know_maj_house==8 ~ ".", 
                                    know_maj_house==9 ~ ".", 
                                    TRUE ~ "."),
         know_maj_sen = case_when(know_maj_sen==1 ~ "R", 
                                  know_maj_sen==2 ~ "D",
                                  know_maj_sen==3 ~ "neither",
                                  know_maj_sen==4 ~ "not sure",
                                  know_maj_sen==8 ~ ".", 
                                  know_maj_sen==9 ~ ".", 
                                  TRUE ~ "."),
         recall_gov = case_when(recall_gov==1 ~ "not heard of", 
                                recall_gov==2 ~ "R", 
                                recall_gov==3 ~ "D",
                                recall_gov==4 ~ "other/I",
                                recall_gov==5 ~ "not sure",
                                recall_gov==8 ~ ".", 
                                recall_gov==9 ~ ".", 
                                TRUE ~ "."),
         recall_sen1 = case_when(recall_sen1==1 ~ "not heard of", 
                                 recall_sen1==2 ~ "R", 
                                 recall_sen1==3 ~ "D",
                                 recall_sen1==4 ~ "other/I",
                                 recall_sen1==5 ~ "not sure",
                                 recall_sen1==8 ~ ".", 
                                 recall_sen1==9 ~ ".", 
                                 TRUE ~ "."),
         recall_sen2 = case_when(recall_sen2==1 ~ "not heard of", 
                                 recall_sen2==2 ~ "R", 
                                 recall_sen2==3 ~ "D",
                                 recall_sen2==4 ~ "other/I",
                                 recall_sen2==5 ~ "not sure",
                                 recall_sen2==8 ~ ".", 
                                 recall_sen2==9 ~ ".", 
                                 TRUE ~ "."),
         recall_rep = case_when(recall_rep==1 ~ "not heard of", 
                                recall_rep==2 ~ "R", 
                                recall_rep==3 ~ "D",
                                recall_rep==4 ~ "other/I",
                                recall_rep==5 ~ "not sure",
                                recall_rep==8 ~ ".", 
                                recall_rep==9 ~ ".", 
                                TRUE ~ "."),
         # don't know as NA 
         ideo_self_na = (case_when(ideo_self==8 ~ NA_real_, TRUE ~ ideo_self)),
         ideo_d_na = (case_when(ideo_d==8 ~ NA_real_, TRUE ~ ideo_d)),
         ideo_r_na = (case_when(ideo_r==8 ~ NA_real_, TRUE ~ ideo_r)),
         # don't know as middle value 
         ideo_self = ifelse(ideo_self==8, 4, ideo_self),
         ideo_d = ifelse(ideo_d==8, 4, ideo_d),
         ideo_r = ifelse(ideo_r==8, 4, ideo_r))

# Convert ideology likert scale to 0-1 scale 
cces_2016.2$ideo_self <- round((cces_2016.2$ideo_self-min(na.omit(cces_2016.2$ideo_self)))/
                                 (max(na.omit(cces_2016.2$ideo_self))-min(na.omit(cces_2016.2$ideo_self))),2)

cces_2016.2$ideo_d <- round((cces_2016.2$ideo_d-min(na.omit(cces_2016.2$ideo_d)))/
                              (max(na.omit(cces_2016.2$ideo_d))-min(na.omit(cces_2016.2$ideo_d))),2)

cces_2016.2$ideo_r <- round((cces_2016.2$ideo_r-min(na.omit(cces_2016.2$ideo_r)))/
                              (max(na.omit(cces_2016.2$ideo_r))-min(na.omit(cces_2016.2$ideo_r))),2)

cces_2016.2$ideo_self_na <- round((cces_2016.2$ideo_self_na-min(na.omit(cces_2016.2$ideo_self_na)))/
                                    (max(na.omit(cces_2016.2$ideo_self_na))-min(na.omit(cces_2016.2$ideo_self_na))),2)

cces_2016.2$ideo_d_na <- round((cces_2016.2$ideo_d_na-min(na.omit(cces_2016.2$ideo_d_na)))/
                                 (max(na.omit(cces_2016.2$ideo_d_na))-min(na.omit(cces_2016.2$ideo_d_na))),2)

cces_2016.2$ideo_r_na <- round((cces_2016.2$ideo_r_na-min(na.omit(cces_2016.2$ideo_r_na)))/
                                 (max(na.omit(cces_2016.2$ideo_r_na))-min(na.omit(cces_2016.2$ideo_r_na))),2)


# 2017 ----
# CC17_320b: 1 - R, 2 - D, 3 - neither, 4 - not sure, 8 - skipped, 9 - not asked 
# CC17_320c: 1 - R, 2 - D, 3 - neither, 4 - not sure, 8 - skipped, 9 - not asked 
# CC17_321a: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked
# CC17_321b: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked
# CC17_321c: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked
# CC17_321d: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked
# CC17_350a: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - not sure, 8 - skipped, 9 - not asked 
# CC17_350d: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - not sure, 8 - skipped, 9 - not asked 
# CC17_350e: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - not sure, 8 - skipped, 9 - not asked 
# CC17_322b: 1 - "strongly a", 2 - "somewhat a", 3 - "somewhat d", 4 - "strongly d", 5 - "not sure", 8 - "skipped", 0 - "not asked"

# Make sure variables are numeric 
cces_2017_sub <- cces_2017_sub %>%
  mutate(ideo_self = as.numeric(as.character(ideo_self)),
         ideo_d = as.numeric(as.character(ideo_d)),
         ideo_r = as.numeric(as.character(ideo_r))) 

# Recode Values 
cces_2017.2 <- cces_2017_sub %>%
  mutate(cong_approval = case_when(cong_approval==2 ~ "somewhat a",
                                   cong_approval==3 ~ "somewhat d",
                                   cong_approval==1 ~ "strongly a",
                                   cong_approval==4 ~ "strongly d",
                                   cong_approval==5 ~ "not sure",
                                   cong_approval==8 ~ "skipped",
                                   cong_approval==9 ~ "not asked",
                                   TRUE ~ "."),
         know_maj_house = case_when(know_maj_house==1 ~ "R", 
                                    know_maj_house==2 ~ "D",
                                    know_maj_house==3 ~ "neither",
                                    know_maj_house==4 ~ "not sure",
                                    know_maj_house==8 ~ ".", 
                                    know_maj_house==9 ~ ".", 
                                    TRUE ~ "."),
         know_maj_sen = case_when(know_maj_sen==1 ~ "R", 
                                  know_maj_sen==2 ~ "D",
                                  know_maj_sen==3 ~ "neither",
                                  know_maj_sen==4 ~ "not sure",
                                  know_maj_sen==8 ~ ".", 
                                  know_maj_sen==9 ~ ".", 
                                  TRUE ~ "."),
         recall_gov = case_when(recall_gov==1 ~ "not heard of", 
                                recall_gov==2 ~ "R", 
                                recall_gov==3 ~ "D",
                                recall_gov==4 ~ "other/I",
                                recall_gov==5 ~ "not sure",
                                recall_gov==8 ~ ".", 
                                recall_gov==9 ~ ".", 
                                TRUE ~ "."),
         recall_sen1 = case_when(recall_sen1==1 ~ "not heard of", 
                                 recall_sen1==2 ~ "R", 
                                 recall_sen1==3 ~ "D",
                                 recall_sen1==4 ~ "other/I",
                                 recall_sen1==5 ~ "not sure",
                                 recall_sen1==8 ~ ".", 
                                 recall_sen1==9 ~ ".", 
                                 TRUE ~ "."),
         recall_sen2 = case_when(recall_sen2==1 ~ "not heard of", 
                                 recall_sen2==2 ~ "R", 
                                 recall_sen2==3 ~ "D",
                                 recall_sen2==4 ~ "other/I",
                                 recall_sen2==5 ~ "not sure",
                                 recall_sen2==8 ~ ".", 
                                 recall_sen2==9 ~ ".", 
                                 TRUE ~ "."),
         recall_rep = case_when(recall_rep==1 ~ "not heard of", 
                                recall_rep==2 ~ "R", 
                                recall_rep==3 ~ "D",
                                recall_rep==4 ~ "other/I",
                                recall_rep==5 ~ "not sure",
                                recall_rep==8 ~ ".", 
                                recall_rep==9 ~ ".", 
                                TRUE ~ "."),
         # don't know and not asked as NA 
         ideo_self_na = (case_when(ideo_self==8 ~ NA_real_, ideo_self==98 ~ NA_real_, TRUE ~ ideo_self)),
         ideo_d_na = (case_when(ideo_d==8 ~ NA_real_, ideo_d==98 ~ NA_real_, TRUE ~ ideo_d)),
         ideo_r_na = (case_when(ideo_r==8 ~ NA_real_, ideo_r==98 ~ NA_real_,TRUE ~ ideo_r)),
         # don't know as middle value 
         ideo_self = ifelse(ideo_self==8, 4, ideo_self),
         ideo_d = ifelse(ideo_d==8, 4, ideo_d),
         ideo_r = ifelse(ideo_r==8, 4, ideo_r))

# Convert ideology likert scale to 0-1 scale 
cces_2017.2$ideo_self <- round((cces_2017.2$ideo_self-min(na.omit(cces_2017.2$ideo_self)))/
                                 (max(na.omit(cces_2017.2$ideo_self))-min(na.omit(cces_2017.2$ideo_self))),2)

cces_2017.2$ideo_d <- round((cces_2017.2$ideo_d-min(na.omit(cces_2017.2$ideo_d)))/
                              (max(na.omit(cces_2017.2$ideo_d))-min(na.omit(cces_2017.2$ideo_d))),2)

cces_2017.2$ideo_r <- round((cces_2017.2$ideo_r-min(na.omit(cces_2017.2$ideo_r)))/
                              (max(na.omit(cces_2017.2$ideo_r))-min(na.omit(cces_2017.2$ideo_r))),2)

cces_2017.2$ideo_self_na <- round((cces_2017.2$ideo_self_na-min(na.omit(cces_2017.2$ideo_self_na)))/
                                    (max(na.omit(cces_2017.2$ideo_self_na))-min(na.omit(cces_2017.2$ideo_self_na))),2)

cces_2017.2$ideo_d_na <- round((cces_2017.2$ideo_d_na-min(na.omit(cces_2017.2$ideo_d_na)))/
                                 (max(na.omit(cces_2017.2$ideo_d_na))-min(na.omit(cces_2017.2$ideo_d_na))),2)

cces_2017.2$ideo_r_na <- round((cces_2017.2$ideo_r_na-min(na.omit(cces_2017.2$ideo_r_na)))/
                                 (max(na.omit(cces_2017.2$ideo_r_na))-min(na.omit(cces_2017.2$ideo_r_na))),2)


# 2018 ----
# CC18_309a: 1 - R, 2 - D, 3 - neither, 4 - not sure
# CC18_309b: 1 - R, 2 - D, 3 - neither, 4 - not sure
# CC18_310a: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure
# CC18_310b: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure
# CC18_310c: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure
# CC18_310d: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure
# CC18_334A: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - not sure
# CC18_334D: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - not sure
# CC18_334E: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - not sure
# perceive_race: 1 - white, 2 - black, 3 - hispanic, 4 - asian, 5 - other, 6 - not sure, 8 - skipped, 9 - not asked  
# CC18_308b: 1 - "strongly a", 2 - "somewhat a", 3 - "somewhat d", 4 - "strongly d", 5 - "not sure", 8 - "skipped", 0 - "not asked"

# Make sure variables are numeric 
cces_2018_sub <- cces_2018_sub %>%
  mutate(ideo_self = as.numeric(as.character(ideo_self)),
         ideo_d = as.numeric(as.character(ideo_d)),
         ideo_r = as.numeric(as.character(ideo_r))) 

# Recode Values 
cces_2018.2 <- cces_2018_sub %>%
  mutate(cong_approval = case_when(cong_approval==2 ~ "somewhat a",
                                   cong_approval==3 ~ "somewhat d",
                                   cong_approval==1 ~ "strongly a",
                                   cong_approval==4 ~ "strongly d",
                                   cong_approval==5 ~ "not sure",
                                   cong_approval==8 ~ "skipped",
                                   cong_approval==9 ~ "not asked",
                                   TRUE ~ "."),
         know_maj_house = case_when(know_maj_house==1 ~ "R", 
                                    know_maj_house==2 ~ "D",
                                    know_maj_house==3 ~ "neither",
                                    know_maj_house==4 ~ "not sure",
                                    TRUE ~ "."),
         know_maj_sen = case_when(know_maj_sen==1 ~ "R", 
                                  know_maj_sen==2 ~ "D",
                                  know_maj_sen==3 ~ "neither",
                                  know_maj_sen==4 ~ "not sure",
                                  TRUE ~ "."),
         recall_gov = case_when(recall_gov==1 ~ "not heard of", 
                                recall_gov==2 ~ "R", 
                                recall_gov==3 ~ "D",
                                recall_gov==4 ~ "other/I",
                                recall_gov==5 ~ "not sure",
                                TRUE ~ "."),
         recall_sen1 = case_when(recall_sen1==1 ~ "not heard of", 
                                 recall_sen1==2 ~ "R", 
                                 recall_sen1==3 ~ "D",
                                 recall_sen1==4 ~ "other/I",
                                 recall_sen1==5 ~ "not sure",
                                 TRUE ~ "."),
         recall_sen2 = case_when(recall_sen2==1 ~ "not heard of", 
                                 recall_sen2==2 ~ "R", 
                                 recall_sen2==3 ~ "D",
                                 recall_sen2==4 ~ "other/I",
                                 recall_sen2==5 ~ "not sure",
                                 TRUE ~ "."),
         recall_rep = case_when(recall_rep==1 ~ "not heard of", 
                                recall_rep==2 ~ "R", 
                                recall_rep==3 ~ "D",
                                recall_rep==4 ~ "other/I",
                                recall_rep==5 ~ "not sure",
                                TRUE ~ "."),
         # don't know as NA 
         ideo_self_na = (case_when(ideo_self==8 ~ NA_real_, TRUE ~ ideo_self)),
         ideo_d_na = (case_when(ideo_d==8 ~ NA_real_, TRUE ~ ideo_d)),
         ideo_r_na = (case_when(ideo_r==8 ~ NA_real_,TRUE ~ ideo_r)),
         # don't know as middle value 
         ideo_self = ifelse(ideo_self==8, 4, ideo_self),
         ideo_d = ifelse(ideo_d==8, 4, ideo_d),
         ideo_r = ifelse(ideo_r==8, 4, ideo_r)) 

# Convert ideology likert scale to 0-1 scale 
cces_2018.2$ideo_self <- round((cces_2018.2$ideo_self-min(na.omit(cces_2018.2$ideo_self)))/
                                 (max(na.omit(cces_2018.2$ideo_self))-min(na.omit(cces_2018.2$ideo_self))),2)

cces_2018.2$ideo_d <- round((cces_2018.2$ideo_d-min(na.omit(cces_2018.2$ideo_d)))/
                              (max(na.omit(cces_2018.2$ideo_d))-min(na.omit(cces_2018.2$ideo_d))),2)

cces_2018.2$ideo_r <- round((cces_2018.2$ideo_r-min(na.omit(cces_2018.2$ideo_r)))/
                              (max(na.omit(cces_2018.2$ideo_r))-min(na.omit(cces_2018.2$ideo_r))),2)

cces_2018.2$ideo_self_na <- round((cces_2018.2$ideo_self_na-min(na.omit(cces_2018.2$ideo_self_na)))/
                                    (max(na.omit(cces_2018.2$ideo_self_na))-min(na.omit(cces_2018.2$ideo_self_na))),2)

cces_2018.2$ideo_d_na <- round((cces_2018.2$ideo_d_na-min(na.omit(cces_2018.2$ideo_d_na)))/
                                 (max(na.omit(cces_2018.2$ideo_d_na))-min(na.omit(cces_2018.2$ideo_d_na))),2)

cces_2018.2$ideo_r_na <- round((cces_2018.2$ideo_r_na-min(na.omit(cces_2018.2$ideo_r_na)))/
                                 (max(na.omit(cces_2018.2$ideo_r_na))-min(na.omit(cces_2018.2$ideo_r_na))),2)

# 2019 ----
# CC19_309a: 1 - R, 2 - D, 3 - neither, 4 - not sure, 8 - skipped, 9 - not asked 
# CC19_309b: 1 - R, 2 - D, 3 - neither, 4 - not sure, 8 - skipped, 9 - not asked 
# CC19_310a: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked
# CC19_310b: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked
# CC19_310c: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked
# CC19_310d: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure, 8 - skipped, 9 - not asked
# CC19_334a: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - not sure, 8 - skipped, 9 - not asked 
# CC19_334d: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - not sure, 8 - skipped, 9 - not asked 
# CC19_334e:1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con,
# 8 - not sure, 8 - skipped, 9 - not asked 
# CC19_308b: 1 - "strongly a", 2 - "somewhat a", 3 - "somewhat d", 4 - "strongly d", 5 - "not sure", 8 - "skipped", 0 - "not asked"

# Make sure variables are numeric 
cces_2019_sub <- cces_2019_sub %>%
  mutate(ideo_self = as.numeric(as.character(ideo_self)),
         ideo_d = as.numeric(as.character(ideo_d)),
         ideo_r = as.numeric(as.character(ideo_r))) 

# Recode Values 
cces_2019.2 <- cces_2019_sub %>%
  mutate(cong_approval = case_when(cong_approval==2 ~ "somewhat a",
                                   cong_approval==3 ~ "somewhat d",
                                   cong_approval==1 ~ "strongly a",
                                   cong_approval==4 ~ "strongly d",
                                   cong_approval==5 ~ "not sure",
                                   cong_approval==8 ~ "skipped",
                                   cong_approval==9 ~ "not asked",
                                   TRUE ~ "."),
         know_maj_house = case_when(know_maj_house==1 ~ "R", 
                                    know_maj_house==2 ~ "D",
                                    know_maj_house==3 ~ "neither",
                                    know_maj_house==4 ~ "not sure",
                                    know_maj_house==8 ~ ".", 
                                    know_maj_house==9 ~ ".", 
                                    TRUE ~ "."),
         know_maj_sen = case_when(know_maj_sen==1 ~ "R", 
                                  know_maj_sen==2 ~ "D",
                                  know_maj_sen==3 ~ "neither",
                                  know_maj_sen==4 ~ "not sure",
                                  know_maj_sen==8 ~ ".", 
                                  know_maj_sen==9 ~ ".", 
                                  TRUE ~ "."),
         recall_gov = case_when(recall_gov==1 ~ "not heard of", 
                                recall_gov==2 ~ "R", 
                                recall_gov==3 ~ "D",
                                recall_gov==4 ~ "other/I",
                                recall_gov==5 ~ "not sure",
                                recall_gov==8 ~ ".", 
                                recall_gov==9 ~ ".", 
                                TRUE ~ "."),
         recall_sen1 = case_when(recall_sen1==1 ~ "not heard of", 
                                 recall_sen1==2 ~ "R", 
                                 recall_sen1==3 ~ "D",
                                 recall_sen1==4 ~ "other/I",
                                 recall_sen1==5 ~ "not sure",
                                 recall_sen1==8 ~ ".", 
                                 recall_sen1==9 ~ ".", 
                                 TRUE ~ "."),
         recall_sen2 = case_when(recall_sen2==1 ~ "not heard of", 
                                 recall_sen2==2 ~ "R", 
                                 recall_sen2==3 ~ "D",
                                 recall_sen2==4 ~ "other/I",
                                 recall_sen2==5 ~ "not sure",
                                 recall_sen2==8 ~ ".", 
                                 recall_sen2==9 ~ ".", 
                                 TRUE ~ "."),
         recall_rep = case_when(recall_rep==1 ~ "not heard of", 
                                recall_rep==2 ~ "R", 
                                recall_rep==3 ~ "D",
                                recall_rep==4 ~ "other/I",
                                recall_rep==5 ~ "not sure",
                                recall_rep==8 ~ ".", 
                                recall_rep==9 ~ ".", 
                                TRUE ~ "."),
         # don't know as NA 
         ideo_self_na = (case_when(ideo_self==8 ~ NA_real_, TRUE ~ ideo_self)),
         ideo_d_na = (case_when(ideo_d==8 ~ NA_real_, TRUE ~ ideo_d)),
         ideo_r_na = (case_when(ideo_r==8 ~ NA_real_, TRUE ~ ideo_r)),
         # don't know as middle value 
         ideo_self = ifelse(ideo_self==8, 4, ideo_self),
         ideo_d = ifelse(ideo_d==8, 4, ideo_d),
         ideo_r = ifelse(ideo_r==8, 4, ideo_r)) 

# Convert ideology likert scale to 0-1 scale 
cces_2019.2$ideo_self <- round((cces_2019.2$ideo_self-min(na.omit(cces_2019.2$ideo_self)))/
                                 (max(na.omit(cces_2019.2$ideo_self))-min(na.omit(cces_2019.2$ideo_self))),2)

cces_2019.2$ideo_d <- round((cces_2019.2$ideo_d-min(na.omit(cces_2019.2$ideo_d)))/
                              (max(na.omit(cces_2019.2$ideo_d))-min(na.omit(cces_2019.2$ideo_d))),2)

cces_2019.2$ideo_r <- round((cces_2019.2$ideo_r-min(na.omit(cces_2019.2$ideo_r)))/
                              (max(na.omit(cces_2019.2$ideo_r))-min(na.omit(cces_2019.2$ideo_r))),2)

cces_2019.2$ideo_self_na <- round((cces_2019.2$ideo_self_na-min(na.omit(cces_2019.2$ideo_self_na)))/
                                    (max(na.omit(cces_2019.2$ideo_self_na))-min(na.omit(cces_2019.2$ideo_self_na))),2)

cces_2019.2$ideo_d_na <- round((cces_2019.2$ideo_d_na-min(na.omit(cces_2019.2$ideo_d_na)))/
                                 (max(na.omit(cces_2019.2$ideo_d_na))-min(na.omit(cces_2019.2$ideo_d_na))),2)

cces_2019.2$ideo_r_na <- round((cces_2019.2$ideo_r_na-min(na.omit(cces_2019.2$ideo_r_na)))/
                                 (max(na.omit(cces_2019.2$ideo_r_na))-min(na.omit(cces_2019.2$ideo_r_na))),2)


# 2020 ----
# CC20_310a: 1 - R, 2 - D, 3 - neither, 4 - not sure
# CC20_310b: 1 - R, 2 - D, 3 - neither, 4 - not sure
# CC20_311a: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure
# CC20_311b: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure
# CC20_311c: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure
# CC20_311d: 1 - not heard of, 2 - R, 3 - D, 4 - other/I, 5 - not sure
# CC20_340a: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con, 8 - not sure
# CC20_340e: 1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con, 8 - not sure
# CC20_340f:1 - very lib, 2 - lib, 3 - somewhat lib, 4 - middle, 5 - somewhat con, 6 - con, 7 - very con, 8 - not sure
# perceive_race: 1 - white, 2 - black, 3 - hispanic, 4 - asian, 5 - other, 6 - not sure, 8 - skipped, 9 - not asked  
# CC20_320b: 1 - "strongly a", 2 - "somewhat a", 3 - "somewhat d", 4 - "strongly d", 5 - "not sure", 8 - "skipped", 0 - "not asked"

# Make sure variables are numeric 
cces_2020_sub <- cces_2020_sub %>%
  mutate(ideo_self = as.numeric(as.character(ideo_self)),
         ideo_d = as.numeric(as.character(ideo_d)),
         ideo_r = as.numeric(as.character(ideo_r))) 

# Recode Values 
cces_2020.2 <- cces_2020_sub %>%
  mutate(cong_approval = case_when(cong_approval==2 ~ "somewhat a",
                                   cong_approval==3 ~ "somewhat d",
                                   cong_approval==1 ~ "strongly a",
                                   cong_approval==4 ~ "strongly d",
                                   cong_approval==5 ~ "not sure",
                                   cong_approval==8 ~ "skipped",
                                   cong_approval==9 ~ "not asked",
                                   TRUE ~ "."),
         know_maj_house = case_when(know_maj_house==1 ~ "R", 
                                    know_maj_house==2 ~ "D",
                                    know_maj_house==3 ~ "neither",
                                    know_maj_house==4 ~ "not sure",
                                    TRUE ~ "."),
         know_maj_sen = case_when(know_maj_sen==1 ~ "R", 
                                  know_maj_sen==2 ~ "D",
                                  know_maj_sen==3 ~ "neither",
                                  know_maj_sen==4 ~ "not sure",
                                  TRUE ~ "."),
         recall_gov = case_when(recall_gov==1 ~ "not heard of", 
                                recall_gov==2 ~ "R", 
                                recall_gov==3 ~ "D",
                                recall_gov==4 ~ "other/I",
                                recall_gov==5 ~ "not sure",
                                TRUE ~ "."),
         recall_sen1 = case_when(recall_sen1==1 ~ "not heard of", 
                                 recall_sen1==2 ~ "R", 
                                 recall_sen1==3 ~ "D",
                                 recall_sen1==4 ~ "other/I",
                                 recall_sen1==5 ~ "not sure",
                                 TRUE ~ "."),
         recall_sen2 = case_when(recall_sen2==1 ~ "not heard of", 
                                 recall_sen2==2 ~ "R", 
                                 recall_sen2==3 ~ "D",
                                 recall_sen2==4 ~ "other/I",
                                 recall_sen2==5 ~ "not sure",
                                 TRUE ~ "."),
         recall_rep = case_when(recall_rep==1 ~ "not heard of", 
                                recall_rep==2 ~ "R", 
                                recall_rep==3 ~ "D",
                                recall_rep==4 ~ "other/I",
                                recall_rep==5 ~ "not sure",
                                TRUE ~ "."),
         # don't know as NA 
         ideo_self_na = (case_when(ideo_self==8 ~ NA_real_, TRUE ~ ideo_self)),
         ideo_d_na = (case_when(ideo_d==8 ~ NA_real_, TRUE ~ ideo_d)),
         ideo_r_na = (case_when(ideo_r==8 ~ NA_real_,TRUE ~ ideo_r)),
         # don't know as middle value 
         ideo_self = ifelse(ideo_self==8 , 4, ideo_self),
         ideo_d = ifelse(ideo_d==8 , 4, ideo_d),
         ideo_r = ifelse(ideo_r==8 , 4, ideo_r)) 

# Convert ideology likert scale to 0-1 scale 
cces_2020.2$ideo_self <- round((cces_2020.2$ideo_self-min(na.omit(cces_2020.2$ideo_self)))/
                                 (max(na.omit(cces_2020.2$ideo_self))-min(na.omit(cces_2020.2$ideo_self))),2)

cces_2020.2$ideo_d <- round((cces_2020.2$ideo_d-min(na.omit(cces_2020.2$ideo_d)))/
                              (max(na.omit(cces_2020.2$ideo_d))-min(na.omit(cces_2020.2$ideo_d))),2)

cces_2020.2$ideo_r <- round((cces_2020.2$ideo_r-min(na.omit(cces_2020.2$ideo_r)))/
                              (max(na.omit(cces_2020.2$ideo_r))-min(na.omit(cces_2020.2$ideo_r))),2)

cces_2020.2$ideo_self_na <- round((cces_2020.2$ideo_self_na-min(na.omit(cces_2020.2$ideo_self_na)))/
                                    (max(na.omit(cces_2020.2$ideo_self_na))-min(na.omit(cces_2020.2$ideo_self_na))),2)

cces_2020.2$ideo_d_na <- round((cces_2020.2$ideo_d_na-min(na.omit(cces_2020.2$ideo_d_na)))/
                                 (max(na.omit(cces_2020.2$ideo_d_na))-min(na.omit(cces_2020.2$ideo_d_na))),2)

cces_2020.2$ideo_r_na <- round((cces_2020.2$ideo_r_na-min(na.omit(cces_2020.2$ideo_r_na)))/
                                 (max(na.omit(cces_2020.2$ideo_r_na))-min(na.omit(cces_2020.2$ideo_r_na))),2)

# MERGE VARIABLES FROM 2006-2019 ----

add_cces_var <- rbind(cces_2006.2, cces_2007.2, cces_2008.2, cces_2009.2, cces_2010.2, cces_2011.2,
                      cces_2012.2, cces_2013.2, cces_2014.2, cces_2015.2, cces_2016.2, cces_2017.2,
                      cces_2018.2, cces_2019.2, cces_2020.2)


## CONVERT VALUES TO NUMBERS ----

# For analysis end up needing numeric values so do at once with all cces years 

# Take the values we just merged and convert the values to numbers 
# Create an "na" version of each variable -- don't know as na or don't know as middle value (to later test if difference in how coded)
add_cces_var.2 <- add_cces_var %>%
  mutate(cong_approval_na = case_when(cong_approval=="somewhat a" ~ 3,
                                      cong_approval=="somewhat d" ~ 2,
                                      cong_approval=="strongly a" ~ 4,
                                      cong_approval=="strongly d" ~ 1,
                                      cong_approval=="not sure" ~ NA_real_,
                                      cong_approval=="skipped" ~ NA_real_,
                                      cong_approval=="not asked" ~ NA_real_,
                                      cong_approval=="neither a or d" ~ NA_real_,
                                      cong_approval=="never heard" ~ NA_real_,
                                      cong_approval=="." ~ NA_real_,
                                      TRUE ~ NA_real_),
         cong_approval = case_when(cong_approval=="somewhat a" ~ 4,
                                   cong_approval=="somewhat d" ~ 2,
                                   cong_approval=="strongly a" ~ 5,
                                   cong_approval=="strongly d" ~ 1,
                                   cong_approval=="not sure" ~ 3,
                                   cong_approval=="skipped" ~ 3,
                                   cong_approval=="not asked" ~ 3,
                                   cong_approval=="neither a or d" ~ 3,
                                   cong_approval=="never heard" ~ 3,
                                   cong_approval=="." ~ NA_real_,
                                   TRUE ~ NA_real_),
         know_maj_house_na = case_when(know_maj_house=="R" ~ 0, 
                                       know_maj_house=="D" ~ 1,
                                       know_maj_house=="neither" ~ 2,
                                       know_maj_house=="not sure" ~ NA_real_,
                                       TRUE ~ NA_real_),
         know_maj_house = case_when(know_maj_house=="R" ~ 0, 
                                    know_maj_house=="D" ~ 1,
                                    know_maj_house=="neither" ~ 2,
                                    know_maj_house=="not sure" ~ 2,
                                    TRUE ~ NA_real_),
         know_maj_sen_na = case_when(know_maj_sen=="R" ~ 0, 
                                     know_maj_sen=="D" ~ 1,
                                     know_maj_sen=="neither" ~ 2,
                                     know_maj_sen=="50D-50R" ~ 2,
                                     know_maj_house=="not sure" ~ NA_real_,
                                     TRUE ~ NA_real_),
         know_maj_sen = case_when(know_maj_sen=="R" ~ 0, 
                                  know_maj_sen=="D" ~ 1,
                                  know_maj_sen=="neither" ~ 2,
                                  know_maj_sen=="50D-50R" ~ 2,
                                  know_maj_house=="not sure" ~ 2,
                                  TRUE ~ NA_real_),
         recall_gov_na = case_when(recall_gov=="not heard of" ~ NA_real_, 
                                   recall_gov=="R" ~ 0, 
                                   recall_gov=="D" ~ 1,
                                   recall_gov=="other/I" ~ 2,
                                   recall_gov=="not sure" ~ NA_real_,
                                   TRUE ~ NA_real_),
         recall_gov = case_when(recall_gov=="not heard of" ~ 2, 
                                recall_gov=="R" ~ 0, 
                                recall_gov=="D" ~ 1,
                                recall_gov=="other/I" ~ 2,
                                recall_gov=="not sure" ~ 2,
                                TRUE ~ NA_real_),
         recall_sen1_na = case_when(recall_sen1=="not heard of" ~ NA_real_, 
                                    recall_sen1=="R" ~ 0, 
                                    recall_sen1=="D" ~ 1,
                                    recall_sen1=="other/I" ~ 2,
                                    recall_sen1=="not sure" ~ NA_real_,
                                    TRUE ~ NA_real_),
         recall_sen1 = case_when(recall_sen1=="not heard of" ~ 2, 
                                 recall_sen1=="R" ~ 0, 
                                 recall_sen1=="D" ~ 1,
                                 recall_sen1=="other/I" ~ 2,
                                 recall_sen1=="not sure" ~ 2,
                                 TRUE ~ NA_real_),
         recall_sen2_na = case_when(recall_sen2=="not heard of" ~ NA_real_, 
                                    recall_sen2=="R" ~ 0, 
                                    recall_sen2=="D" ~ 1,
                                    recall_sen2=="other/I" ~ 2,
                                    recall_sen2=="not sure" ~ NA_real_,
                                    TRUE ~ NA_real_),
         recall_sen2 = case_when(recall_sen2=="not heard of" ~ 2, 
                                 recall_sen2=="R" ~ 0, 
                                 recall_sen2=="D" ~ 1,
                                 recall_sen2=="other/I" ~ 2,
                                 recall_sen2=="not sure" ~ 2,
                                 TRUE ~ NA_real_),
         recall_rep_na = case_when(recall_rep=="not heard of" ~ NA_real_, 
                                   recall_rep=="R" ~ 0, 
                                   recall_rep=="D" ~ 1,
                                   recall_rep=="other/I" ~ 2,
                                   recall_rep=="not sure" ~ NA_real_,
                                   TRUE ~ NA_real_),
         recall_rep = case_when(recall_rep=="not heard of" ~ 2, 
                                recall_rep=="R" ~ 0, 
                                recall_rep=="D" ~ 1,
                                recall_rep=="other/I" ~ 2,
                                recall_rep=="not sure" ~ 2,
                                TRUE ~ NA_real_))

# MERGE THESE VARIABLES TO CUMMULATIVE CCES FILE ----

# Load cces cumulative 
cces_cumu <- read_dta("./CCES Files/cumulative_2006-2020.dta")

# Rename additional variable dataset from above for clarity
cces_add <- add_cces_var.2

# Join additional variables to the to original cces by case_id and year 
cces_join <- cces_cumu %>%
  mutate(case_id = as.numeric(case_id)) %>%
  left_join(cces_add, by=c("case_id", "year"))

# Fix cong column (for some reason cumulative file listed congresses weirdly)
cces_join2 <- cces_join %>%
  mutate(cong = case_when(cong==1 ~ 109, 
                          cong==2 ~ 110,
                          cong==3 ~ 111,
                          cong==4 ~ 112,
                          cong==5 ~ 113,
                          cong==6 ~ 114,
                          cong==7 ~ 115, 
                          cong==8 ~ 116,
                          TRUE ~ NA_real_))

## DOWNLOAD DATA ON MEMBERS OF CONGRESS (HAND-COMBINED CARNES, DAILYKOS, AND FILLED IN BY HAND) ----
df_mc <- read.csv("./Full_MCdata_110-116.csv")

# CLEAN UP CCES TO MERGE TO MC DATA ----

# Rename joined dataset from above for clarity
cces_full <- cces_join2

# Subset to variables of interest
cces_sub <- cces_full %>% 
  select(year, case_id, weight, weight_cumulative, state, st, cong, dist, dist_post, zipcode, 
         county_fips, pid3, pid3_leaner, pid7, ideo5,
         gender, birthyr, age, educ, race, religion, marstat, faminc, employ,
         newsint, approval_pres, approval_rep, approval_sen1, approval_sen2, approval_gov,
         intent_pres_08, intent_pres_12, intent_pres_16, intent_pres_20, voted_pres_08, voted_pres_12, 
         voted_pres_16, voted_pres_20, intent_pres_party, voted_pres_party, vv_regstatus, vv_party_gen, 
         vv_turnout_gvm, intent_rep, intent_rep_party, voted_rep, voted_rep_party, 
         intent_gov, intent_gov_party, voted_gov, voted_gov_party, intent_sen, 
         intent_sen_party, voted_sen, voted_sen_party, rep_current, rep_icpsr, intent_rep_chosen,
         voted_rep_chosen, sen1_current, sen2_current, sen1_icpsr, sen2_icpsr,
         intent_sen_chosen, voted_sen_chosen, know_maj_house, know_maj_sen, recall_gov,
         recall_sen1, recall_sen2, recall_rep, ideo_self, ideo_d, ideo_r, 
         ideo_self_na, ideo_d_na, ideo_r_na, perceive_race) %>%
  # no need for 109th congress - those elected in 2004 and served 05-06 (I'm starting at those elected in 06, serving 07-09)
  filter(cong != 109) %>%
  # not including DC, America Samoa, Guam, Northern Mariana Islands, Puerto Rico, Outlaying Islands, Virgin Islands
  filter(state!=11 | 60 | 66 | 69 | 72 | 74 | 78) %>% 
  rename(state_num = state) %>%
  mutate(state_num = as.numeric(as.character(state_num)))

# States are numbers in cumulative file, so have to correct for that 
states <- as.matrix(state.name)
state_num <- as.matrix(c(1, 2, 4:6, 8:10, 12, 13, 15:42, 44:51, 53:56))
state_df <- as.data.frame(cbind(states, state_num))
colnames(state_df) <- c("state", "state_num")
state_df$state_num <- as.numeric(state_num)

# Bind these states and numbers onto CCES dataset 
cces_sub.1 <- cces_sub %>%
  left_join(state_df, by = "state_num") %>%
  select(-state_num)

# Recode values from the subset variables ("dont know" as NA vs. middle value) 
cces_sub.2 <- cces_sub.1 %>%
  mutate(ideo5_na = case_when(ideo5==1 ~ 1, # very lib
                              ideo5==2 ~ 2, # lib
                              ideo5==3 ~ 3, # moderate
                              ideo5==4 ~ 4, # con
                              ideo5==5 ~ 5, # very con
                              ideo5==6 ~ NA_real_, # not sure
                              TRUE ~ NA_real_),
         ideo5 = case_when(ideo5==1 ~ 1,
                           ideo5==2 ~ 2,
                           ideo5==3 ~ 3,
                           ideo5==4 ~ 4,
                           ideo5==5 ~ 5,
                           ideo5==6 ~ 3, # not sure as middle value 
                           TRUE ~ NA_real_),
         party3_na = case_when(pid3==1 ~ 1, # democrat
                               pid3==2 ~ 3, # republican
                               pid3==3 ~ 2, # independent
                               pid3==4 ~ 2, # other 
                               pid3==5 ~ NA_real_, #not sure
                               TRUE ~ NA_real_),
         party3 = case_when(pid3==1 ~ 1, 
                            pid3==2 ~ 3, 
                            pid3==3 ~ 2,
                            pid3==4 ~ 2, 
                            pid3==5 ~ 2, # not sure as middle value 
                            TRUE ~ NA_real_),
         pid3_leaner_na = case_when(pid3_leaner==1 ~ 1, # dem
                                    pid3_leaner==2 ~ 3, # rep
                                    pid3_leaner==3 ~ 2, # ind
                                    pid3_leaner==8 ~ NA_real_, # not sure
                                    TRUE ~ NA_real_),
         pid3_leaner = case_when(pid3_leaner==1 ~ 1,
                                 pid3_leaner==2 ~ 3,
                                 pid3_leaner==3 ~ 2,
                                 pid3_leaner==8 ~ 2, # not sure as middle value 
                                 TRUE ~ NA_real_),
         pid7_na = case_when(pid7==1 ~ 1, # strong dem
                             pid7==2 ~ 2, # not strong dem
                             pid7==3 ~ 3, # lean dem 
                             pid7==4 ~ 4, # ind
                             pid7==5 ~ 5, # lean rep 
                             pid7==6 ~ 6, # not strong rep 
                             pid7==7 ~ 7, # strong rep 
                             pid7==8 ~ NA_real_, # not sure 
                             TRUE ~ NA_real_),
         pid7 = case_when(pid7==1 ~ 1,
                          pid7==2 ~ 2,
                          pid7==3 ~ 3,
                          pid7==4 ~ 4,
                          pid7==5 ~ 5,
                          pid7==6 ~ 6,
                          pid7==7 ~ 7,
                          pid7==8 ~ 4, # not sure as middle value 
                          TRUE ~ NA_real_),
         # generate main party variable, where leaners are coded as partisans
         party7_na = case_when(pid7==1 ~ 1, # strong dem
                               pid7==2 ~ 1, # not strong dem
                               pid7==3 ~ 1, # lean dem 
                               pid7==4 ~ 2, # ind
                               pid7==5 ~ 3, # lean rep 
                               pid7==6 ~ 3, # not strong rep 
                               pid7==7 ~ 3, # strong rep 
                               pid7==8 ~ NA_real_, # not sure 
                               TRUE ~ NA_real_),
         party7 = case_when(pid7==1 ~ 1,
                            pid7==2 ~ 1,
                            pid7==3 ~ 1,
                            pid7==4 ~ 2,
                            pid7==5 ~ 3,
                            pid7==6 ~ 3,
                            pid7==7 ~ 3,
                            pid7==8 ~ 2, # not sure as middle value 
                            TRUE ~ NA_real_),
         approval_rep_na = case_when(approval_rep==1 ~ 5, # Strong approve
                                     approval_rep==2 ~ 4, # somewhat approve
                                     approval_rep==3 ~ 2, # somewhat disapprove 
                                     approval_rep==4 ~ 1, # strongly disapprove
                                     approval_rep==8 ~ 3, # neither approve nor disapprove 
                                     approval_rep==5 ~ NA_real_, # never heard / not sure 
                                     approval_rep==6 ~ NA_real_, # never heard of person 
                                     approval_rep==7 ~ NA_real_, # never heard of this person
                                     TRUE ~ NA_real_),
         approval_rep_med = case_when(approval_rep==1 ~ 5, # Strong approve
                                      approval_rep==2 ~ 4, # somewhat approve
                                      approval_rep==3 ~ 2, # somewhat disapprove 
                                      approval_rep==4 ~ 1, # strongly disapprove
                                      approval_rep==8 ~ 3, # neither approve nor disapprove 
                                      approval_rep==5 ~ median(na.omit(approval_rep)), # never heard / not sure 
                                      approval_rep==6 ~ median(na.omit(approval_rep)), # never heard of person 
                                      approval_rep==7 ~ median(na.omit(approval_rep)), # never heard of this person
                                      TRUE ~ NA_real_),
         approval_rep_mean = case_when(approval_rep==1 ~ 5, # Strong approve
                                       approval_rep==2 ~ 4, # somewhat approve
                                       approval_rep==3 ~ 2, # somewhat disapprove 
                                       approval_rep==4 ~ 1, # strongly disapprove
                                       approval_rep==8 ~ 3, # neither approve nor disapprove 
                                       approval_rep==5 ~ mean(na.omit(approval_rep)), # never heard / not sure 
                                       approval_rep==6 ~ mean(na.omit(approval_rep)), # never heard of person 
                                       approval_rep==7 ~ mean(na.omit(approval_rep)), # never heard of this person
                                       TRUE ~ NA_real_),
         approval_rep = case_when(approval_rep==1 ~ 5,
                                  approval_rep==2 ~ 4, 
                                  approval_rep==3 ~ 2, 
                                  approval_rep==4 ~ 1, 
                                  approval_rep==8 ~ 3, 
                                  # all versions of never heard/not sure coded as middle value
                                  approval_rep==5 ~ 3,
                                  approval_rep==6 ~ 3,
                                  approval_rep==7 ~ 3,
                                  TRUE ~ NA_real_),
         approval_sen1_na = case_when(approval_sen1==1 ~ 5,
                                      approval_sen1==2 ~ 4, 
                                      approval_sen1==3 ~ 2, 
                                      approval_sen1==4 ~ 1, 
                                      approval_sen1==5 ~ NA_real_, 
                                      approval_sen1==6 ~ NA_real_,
                                      approval_sen1==7 ~ NA_real_,
                                      approval_sen1==8 ~ 3,
                                      TRUE ~ NA_real_),
         approval_sen1 = case_when(approval_sen1==1 ~ 5,
                                   approval_sen1==2 ~ 4, 
                                   approval_sen1==3 ~ 2, 
                                   approval_sen1==4 ~ 1, 
                                   # all versions of never heard/not sure coded as middle value
                                   approval_sen1==5 ~ 3, 
                                   approval_sen1==6 ~ 3,
                                   approval_sen1==7 ~ 3,
                                   approval_sen1==8 ~ 3,
                                   TRUE ~ NA_real_),
         approval_sen2_na = case_when(approval_sen2==1 ~ 5,
                                      approval_sen2==2 ~ 4, 
                                      approval_sen2==3 ~ 2, 
                                      approval_sen2==5 ~ 1, 
                                      approval_sen2==5 ~ NA_real_, 
                                      approval_sen2==6 ~ NA_real_,
                                      approval_sen2==7 ~ NA_real_,
                                      approval_sen2==8 ~ 3,
                                      TRUE ~ NA_real_),
         approval_sen2 = case_when(approval_sen2==1 ~ 5,
                                   approval_sen2==2 ~ 4, 
                                   approval_sen2==3 ~ 2, 
                                   approval_sen2==4 ~ 1, 
                                   # all versions of never heard/not sure coded as middle value
                                   approval_sen2==5 ~ 3, 
                                   approval_sen2==6 ~ 3,
                                   approval_sen2==7 ~ 3,
                                   approval_sen2==8 ~ 3,
                                   TRUE ~ NA_real_),
         faminc = case_when(faminc== 1 ~ 1, 
                            faminc== 2 ~ 2, 
                            faminc== 3 ~ 3, 
                            faminc== 4 ~ 4, 
                            faminc== 5 ~ 5, 
                            faminc== 6 ~ 6, 
                            faminc== 7 ~ 7, 
                            faminc== 8 ~ 8, 
                            faminc== 9 ~ 9, 
                            faminc== 10 ~ 10, 
                            faminc== 11 ~ 11, 
                            faminc== 12 ~ 12,
                            faminc== 13 ~ NA_real_, 
                            faminc== 14 ~ NA_real_)) %>%
  mutate(religion = as.numeric(as.character(religion))) %>%
  mutate(vv_turnout_gvm = case_when(vv_turnout_gvm==2 ~ 0, # validated turnout
                                    vv_turnout_gvm==3 ~ NA_real_,
                                    vv_turnout_gvm==1~1,
                                    TRUE ~ NA_real_))

## CLEANING MC file to match CCES ----

# Adding Nominate Scores ----

# load files 
nominate110 <- read.csv("./DW-NOMINATE/H110_members.csv")
nominate111 <- read.csv("./DW-NOMINATE/H111_members.csv")
nominate112 <- read.csv("./DW-NOMINATE/H112_members.csv")
nominate113 <- read.csv("./DW-NOMINATE/H113_members.csv")
nominate114 <- read.csv("./DW-NOMINATE/H114_members.csv")
nominate115 <- read.csv("./DW-NOMINATE/H115_members.csv")
nominate116 <- read.csv("./DW-NOMINATE/H116_members.csv")

# Combine nominate files 
nominate_combine <- rbind(nominate110, nominate111, nominate112, nominate113,
                          nominate114, nominate115, nominate116)

# Don't want president 
nominate_combine <- nominate_combine %>% 
  filter(chamber != "President")

# Select and Rename variables and then recode party 
nominate_combine.2 <- nominate_combine %>%
  select(congress, district_code, state_abbrev, party_code, icpsr,
         bioguide_id, bioname, nominate_dim1, nominate_dim2) %>%
  rename(cong = congress,
         rep_icpsr = icpsr,
         dist = district_code,
         st_abbr = state_abbrev,
         mc_party = party_code) %>%
  mutate(mc_party = case_when(mc_party==200 ~ 0, 
                              mc_party==100 ~ 1,
                              mc_party==328 ~ 0)) # only person is Amash... so R 

# Add nominate to MC dataframe 
## Add abbreviations first
state_abbr <- as.data.frame(cbind(state.abb, state.name))
colnames(state_abbr) <- c("st_abbr", "state")
df_mc2 <- df_mc %>% 
  left_join(state_abbr, by="state")

## Join nominate scores to main MC data
df_mc3 <- df_mc2 %>%
  left_join(y=nominate_combine.2, by=c("cong", "dist", "st_abbr", "mc_party"))%>%
  #remove the bio names once checked that they're correct
  select(-bioname)

# Add Presidents each year ----

# Create vector of presidential party, vector of congresses, and merge for party each congress  
pres_party <- c(0,1,1,1,1,0,0)
cong <- c(110,111,112,113,114,115,116)
pres <- as.data.frame(cbind(pres_party, cong))
# Join presidential party to main frame 
df_mc4 <- df_mc3 %>% left_join(pres, by="cong")

# Change years in order to make 2 full dataframes ----
df_mc4 <- df_mc4 %>%
  mutate(year = case_when(mc_year==2007 ~ 2008,
                          mc_year==2009 ~ 2010,
                          mc_year==2011 ~ 2012,
                          mc_year==2013 ~ 2014,
                          mc_year==2015 ~ 2016,
                          mc_year==2017 ~ 2018,
                          mc_year==2019 ~ 2020)) %>%
  rename(year2 = mc_year) # year two is the odd years 

## COMBINE CCES & MC DATA ----
### Need to create 2 data sets 

# First, merge MC to CCES based on even years 
# "year" is evens 
cces_evens <- cces_sub.2 %>%
  filter(year==2008 | year==2010 | year==2012 | year==2014 | year==2016 | year==2018 | year==2020)

cces_mc_evens <- cces_evens %>%
  mutate(state = as.character(state)) %>%
  left_join(df_mc4, by = c("cong", "year", "state", "dist"))

# Next, merge MC to CCES based on odd years 
# "year2" is odds
cces_odds <- cces_sub.2 %>%
  filter(year==2007 | year==2009 | year==2011 | year==2013 | year==2015 | year==2017 | year==2019)

cces_mc_odds <- cces_odds %>%
  mutate(state = as.character(state)) %>%
  rename(year2=year) %>% # do this so can merge with mc evens 
  left_join(df_mc4, by = c("cong", "year2", "state", "dist"))

## ONE-PERIOD LAGS & LEADS ----

## Odds 
# Rename file for clarity 
df_odds <- cces_mc_odds

# Evens 
# Rename file for clarity 
df_evens <- cces_mc_evens

# Lag and Lead functions (1 time period)
lead <- function(x) c(NA,x[1:(length(x)-1)])
lag <- function(x) c(x[2:length(x)],NA)

# Need state_dist variable, non_white respondent variable, and race variables for respondents 
df_odds <- df_odds %>%
  mutate(state_dist = case_when(state!="" ~ paste0(str_to_upper(toupper(st)), 
                                                   "-", str_to_title(dist)), TRUE ~ NA_character_),
         nonwhite = case_when(race!=1 ~ 1, TRUE ~ 0),
         black = case_when(race==2 ~ 1, TRUE ~ 0),
         hispanic = case_when(race==3 ~ 1, TRUE ~ 0),
         asian = case_when(race==4 ~ 1, TRUE ~ 0))

df_evens <- df_evens %>%
  mutate(state_dist = case_when(state!="" ~ paste0(str_to_upper(toupper(st)), 
                                                   "-", str_to_title(dist)), TRUE ~ NA_character_),
         nonwhite = case_when(race!=1 ~ 1, TRUE ~ 0),
         black = case_when(race==2 ~ 1, TRUE ~ 0),
         hispanic = case_when(race==3 ~ 1, TRUE ~ 0),
         asian = case_when(race==4 ~ 1, TRUE ~ 0))


## ODDS FIRST 
# separate dataframe to work with lags and leads 
treat_lag_odds <- df_odds %>%
  select(cong, state_dist, mc_gender, nonwhite_mc, black_mc, hispanic_mc, asian_mc) %>%
  group_by(cong, state_dist)

treat_lag_odds <- unique(treat_lag_odds)

treat_lag_odds <- treat_lag_odds %>%
  arrange(state_dist, cong) 

## MC lags (t-1)
treat_lag_odds$lag_gender_mc <- ave(treat_lag_odds$mc_gender, treat_lag_odds$state_dist, FUN=lag)
treat_lag_odds$lag_nonwhite_mc <- ave(treat_lag_odds$nonwhite_mc, treat_lag_odds$state_dist, FUN=lag)
treat_lag_odds$lag_black_mc <- ave(treat_lag_odds$black_mc, treat_lag_odds$state_dist, FUN=lag)
treat_lag_odds$lag_hispanic_mc <- ave(treat_lag_odds$hispanic_mc, treat_lag_odds$state_dist, FUN=lag)
treat_lag_odds$lag_asian_mc <- ave(treat_lag_odds$asian_mc, treat_lag_odds$state_dist, FUN=lag)

## MC leads (t+1)
treat_lag_odds$lead_gender_mc <- ave(treat_lag_odds$mc_gender, treat_lag_odds$state_dist, FUN=lead)
treat_lag_odds$lead_nonwhite_mc <- ave(treat_lag_odds$nonwhite_mc, treat_lag_odds$state_dist, FUN=lead)
treat_lag_odds$lead_black_mc <- ave(treat_lag_odds$black_mc, treat_lag_odds$state_dist, FUN=lead)
treat_lag_odds$lead_hispanic_mc <- ave(treat_lag_odds$hispanic_mc, treat_lag_odds$state_dist, FUN=lead)
treat_lag_odds$lead_asian_mc <- ave(treat_lag_odds$asian_mc, treat_lag_odds$state_dist, FUN=lead)

# adding to dataframe 
df_odds2 <- df_odds %>%
  left_join(treat_lag_odds, by = c("cong", "state_dist", "mc_gender", "nonwhite_mc", 
                                   "black_mc", "hispanic_mc", "asian_mc"))

## EVENS 
# separate dataframe to work with lags and leads 
treat_lag_evens <- df_evens %>%
  select(cong, state_dist, mc_gender, nonwhite_mc, black_mc, hispanic_mc, asian_mc) %>%
  group_by(cong, state_dist)

treat_lag_evens <- unique(treat_lag_evens)

treat_lag_evens <- treat_lag_evens %>%
  arrange(state_dist, cong) 

## MC lags (t-1)
treat_lag_evens$lag_gender_mc <- ave(treat_lag_evens$mc_gender, treat_lag_evens$state_dist, FUN=lag)
treat_lag_evens$lag_nonwhite_mc <- ave(treat_lag_evens$nonwhite_mc, treat_lag_evens$state_dist, FUN=lag)
treat_lag_evens$lag_black_mc <- ave(treat_lag_evens$black_mc, treat_lag_evens$state_dist, FUN=lag)
treat_lag_evens$lag_hispanic_mc <- ave(treat_lag_evens$hispanic_mc, treat_lag_evens$state_dist, FUN=lag)
treat_lag_evens$lag_asian_mc <- ave(treat_lag_evens$asian_mc, treat_lag_evens$state_dist, FUN=lag)

## MC leads (t-1)
treat_lag_evens$lead_gender_mc <- ave(treat_lag_evens$mc_gender, treat_lag_evens$state_dist, FUN=lead)
treat_lag_evens$lead_nonwhite_mc <- ave(treat_lag_evens$nonwhite_mc, treat_lag_evens$state_dist, FUN=lead)
treat_lag_evens$lead_black_mc <- ave(treat_lag_evens$black_mc, treat_lag_evens$state_dist, FUN=lead)
treat_lag_evens$lead_hispanic_mc <- ave(treat_lag_evens$hispanic_mc, treat_lag_evens$state_dist, FUN=lead)
treat_lag_evens$lead_asian_mc <- ave(treat_lag_evens$asian_mc, treat_lag_evens$state_dist, FUN=lead)

# adding to dataframe 
df_evens2 <- df_evens %>%
  left_join(treat_lag_evens, by = c("cong", "state_dist", "mc_gender", "nonwhite_mc", 
                                    "black_mc", "hispanic_mc", "asian_mc"))

## TWO-PERIOD LAGS & LEADS ----

# Rename for clarity  
df_odds <- df_odds2
df_evens <- df_evens2

# Functions for 2-period
lead2 <- function(x) c(NA, NA, x[1:(length(x)-2)])
lag2 <- function(x) c(x[3:length(x)],NA, NA)

# set up file for odds
treat_lag_odds <- df_odds %>%
  select(cong, state_dist, mc_gender, nonwhite_mc, black_mc, hispanic_mc, asian_mc) %>%
  group_by(cong, state_dist)

treat_lag_odds <- unique(treat_lag_odds)

treat_lag_odds <- treat_lag_odds %>%
  arrange(state_dist, cong) 

## MC lags (t-2)
treat_lag_odds$lag2_gender_mc <- ave(treat_lag_odds$mc_gender, treat_lag_odds$state_dist, FUN=lag2)
treat_lag_odds$lag2_nonwhite_mc <- ave(treat_lag_odds$nonwhite_mc, treat_lag_odds$state_dist, FUN=lag2)
treat_lag_odds$lag2_black_mc <- ave(treat_lag_odds$black_mc, treat_lag_odds$state_dist, FUN=lag2)
treat_lag_odds$lag2_hispanic_mc <- ave(treat_lag_odds$hispanic_mc, treat_lag_odds$state_dist, FUN=lag2)
treat_lag_odds$lag2_asian_mc <- ave(treat_lag_odds$asian_mc, treat_lag_odds$state_dist, FUN=lag2)

## MC leads (t-2)
treat_lag_odds$lead2_gender_mc <- ave(treat_lag_odds$mc_gender, treat_lag_odds$state_dist, FUN=lead2)
treat_lag_odds$lead2_nonwhite_mc <- ave(treat_lag_odds$nonwhite_mc, treat_lag_odds$state_dist, FUN=lead2)
treat_lag_odds$lead2_black_mc <- ave(treat_lag_odds$black_mc, treat_lag_odds$state_dist, FUN=lead2)
treat_lag_odds$lead2_hispanic_mc <- ave(treat_lag_odds$hispanic_mc, treat_lag_odds$state_dist, FUN=lead2)
treat_lag_odds$lead2_asian_mc <- ave(treat_lag_odds$asian_mc, treat_lag_odds$state_dist, FUN=lead2)

df_odds2 <- df_odds %>%
  left_join(treat_lag_odds, by = c("cong", "state_dist", "mc_gender", "nonwhite_mc", 
                                   "black_mc", "hispanic_mc", "asian_mc"))

# set up file for evens
treat_lag_evens <- df_evens %>%
  select(cong, state_dist, mc_gender, nonwhite_mc, black_mc, hispanic_mc, asian_mc) %>%
  group_by(cong, state_dist)

treat_lag_evens <- unique(treat_lag_evens)

treat_lag_evens <- treat_lag_evens %>%
  arrange(state_dist, cong) 

## MC lags (t-2)
treat_lag_evens$lag2_gender_mc <- ave(treat_lag_evens$mc_gender, treat_lag_evens$state_dist, FUN=lag2)
treat_lag_evens$lag2_nonwhite_mc <- ave(treat_lag_evens$nonwhite_mc, treat_lag_evens$state_dist, FUN=lag2)
treat_lag_evens$lag2_black_mc <- ave(treat_lag_evens$black_mc, treat_lag_evens$state_dist, FUN=lag2)
treat_lag_evens$lag2_hispanic_mc <- ave(treat_lag_evens$hispanic_mc, treat_lag_evens$state_dist, FUN=lag2)
treat_lag_evens$lag2_asian_mc <- ave(treat_lag_evens$asian_mc, treat_lag_evens$state_dist, FUN=lag2)

## MC leads (t-2)
treat_lag_evens$lead2_gender_mc <- ave(treat_lag_evens$mc_gender, treat_lag_evens$state_dist, FUN=lead2)
treat_lag_evens$lead2_nonwhite_mc <- ave(treat_lag_evens$nonwhite_mc, treat_lag_evens$state_dist, FUN=lead2)
treat_lag_evens$lead2_black_mc <- ave(treat_lag_evens$black_mc, treat_lag_evens$state_dist, FUN=lead2)
treat_lag_evens$lead2_hispanic_mc <- ave(treat_lag_evens$hispanic_mc, treat_lag_evens$state_dist, FUN=lead2)
treat_lag_evens$lead2_asian_mc <- ave(treat_lag_evens$asian_mc, treat_lag_evens$state_dist, FUN=lead2)

df_evens2 <- df_evens %>%
  left_join(treat_lag_evens, by = c("cong", "state_dist", "mc_gender", "nonwhite_mc", 
                                    "black_mc", "hispanic_mc", "asian_mc"))


## CLEANING UP RESPONDENT GENDER ----

# Switch gender to 0-1 rather than 1-2 (forgot to do earlier for respondent gender)
df_odd2 <- df_odds2 %>%
  mutate(gender = case_when(gender==1 ~ 0,
                            gender==2 ~ 1,
                            TRUE ~ NA_real_))

df_even2 <- df_evens2 %>%
  mutate(gender = case_when(gender==1 ~ 0,
                            gender==2 ~ 1,
                            TRUE ~ NA_real_))


## NEED TO RESCALE APPROVAL TO 0-1 ---- 
df_odd3 <- df_odd2 %>% 
  mutate(approval_rep = rescale(approval_rep, to=c(0,1)),
         approval_rep_na = rescale(approval_rep_na, to=c(0,1)),
         approval_rep_mean = rescale(approval_rep_mean, to=c(0,1)))

df_even3 <- df_even2 %>%
  mutate(approval_rep = rescale(approval_rep, to=c(0,1)),
         approval_rep_na = rescale(approval_rep_na, to=c(0,1)),
         approval_rep_mean = rescale(approval_rep_mean, to=c(0,1)))

## NEED TO RECODE MC RACE TO BE OPPOSITE WHITE ####
# New variable for black_mc where non-black is NA for when it's non-white
# New variable for hispanic_mc where non-hispanic is NA for when it's non-white
# New variable for asian_mc where non-asian is NA for when it's non-white
df_even4 <- df_even3 %>%
  mutate(black_mc_nw = case_when(black_mc == 1 ~ 1,
                                 hispanic_mc == 1 ~ NA_real_,
                                 asian_mc == 1 ~ NA_real_,
                                 TRUE ~ 0),
         hispanic_mc_nw = case_when(hispanic_mc == 1 ~ 1,
                                    black_mc == 1 ~ NA_real_,
                                    asian_mc == 1 ~ NA_real_,
                                    TRUE ~ 0),
         asian_mc_nw = case_when(asian_mc == 1 ~ 1,
                                 hispanic_mc == 1 ~ NA_real_,
                                 black_mc == 1 ~ NA_real_,
                                 TRUE ~ 0))


df_odd4 <- df_odd3 %>%
  mutate(black_mc_nw = case_when(black_mc == 1 ~ 1,
                                 hispanic_mc == 1 ~ NA_real_,
                                 asian_mc == 1 ~ NA_real_,
                                 TRUE ~ 0),
         hispanic_mc_nw = case_when(hispanic_mc == 1 ~ 1,
                                    black_mc == 1 ~ NA_real_,
                                    asian_mc == 1 ~ NA_real_,
                                    TRUE ~ 0),
         asian_mc_nw = case_when(asian_mc == 1 ~ 1,
                                 hispanic_mc == 1 ~ NA_real_,
                                 black_mc == 1 ~ NA_real_,
                                 TRUE ~ 0))

## CREATING KNOWLEDGE VARIABLES WITH CCES KNOWLEDGE DATA ####
# download file with majority parties in each year 
maj_df <- read.csv("./maj_congress.csv")

# subset to just the congress and majority party 
maj_df_sub <- maj_df %>%
  select(cong, maj_party)

# join with main dataframes 
df_even5 <- df_even4 %>%
  left_join(maj_df_sub, by = "cong")

df_odd5 <- df_odd4 %>%
  left_join(maj_df_sub, by = "cong")

# generate a variable for knowledge of majority party in the house using respondent answer and actual majority party
df_even6 <- df_even5 %>%
  # Creating knowledge variable 1 - majority party in House
  mutate(knowledge1 = case_when(know_maj_house==1 & maj_party==1 ~ 1, 
                                know_maj_house==0 & maj_party==0 ~ 1,
                                know_maj_house==1 & maj_party==0 ~ 0,
                                know_maj_house==0 & maj_party==1 ~ 0,
                                know_maj_house==2 ~ 0,
                                TRUE ~ NA_real_),
         # creating knowledge variable 2 - recall party of representative 
         knowledge2 = case_when(mc_party == recall_rep ~ 1, 
                                mc_party != recall_rep ~ 0,
                                TRUE ~ NA_real_),
         # create average knowledge variable 
         knowledge_avg = (knowledge1+knowledge2)/2)

df_odd6 <- df_odd5 %>%
  # Creating knowledge variable 1 - majority party in House
  mutate(knowledge1 = case_when(know_maj_house==1 & maj_party==1 ~ 1, 
                                know_maj_house==0 & maj_party==0 ~ 1,
                                know_maj_house==1 & maj_party==0 ~ 0,
                                know_maj_house==0 & maj_party==1 ~ 0,
                                know_maj_house==2 ~ 0,
                                TRUE ~ NA_real_),
         # creating knowledge variable 2 - recall party of representative 
         knowledge2 = case_when(mc_party == recall_rep ~ 1, 
                                mc_party != recall_rep ~ 0,
                                TRUE ~ NA_real_),
         # create average knowledge variable 
         knowledge_avg = (knowledge1+knowledge2)/2)

## NEED TO ADD CONGRUENCE VARIABLES and CONGRUENCE LAGS/LEADS ####
## EVENS 
df_even7 <- df_even6 %>%
  # Set up binary variables for gender and race congruence 
  mutate(female_congruence = ifelse(mc_gender==1 & gender==1, 1, 0),
         male_congruence = ifelse(mc_gender==0 & gender==0, 1, 0),
         gender_congruence = case_when(mc_gender==1 & gender==1 ~ 1,
                                       mc_gender==0 & gender==0 ~ 1,
                                       TRUE ~ 0),
         voter_female = ifelse(gender==1, 1, 0),
         voter_male = ifelse(gender==0, 1, 0),
         mc_female = ifelse(mc_gender==1, 1, 0),
         mc_male = ifelse(mc_gender==0, 1, 0),
         nonwhite_congruence = ifelse(nonwhite_mc==1 & nonwhite==1, 1, 0),
         white_congruence = ifelse(nonwhite_mc==0 & nonwhite==0, 1, 0),
         black_congruence = ifelse(black_mc_nw==1 & black==1, 1, 0),
         asian_congruence = ifelse(asian_mc_nw==1 & asian==1, 1, 0),
         hispanic_congruence = ifelse(hispanic_mc_nw==1 & hispanic==1, 1, 0),
         nonwhite_and_racecong = case_when((nonwhite==1 & black_mc_nw==1 & black==1) ~ 1,
                                           (nonwhite==1 & hispanic_mc_nw==1 & hispanic==1) ~ 1,
                                           (nonwhite==1 & asian_mc_nw==1 & asian==1) ~ 1,
                                           TRUE ~ 0),
         race_congruence = case_when(black_mc_nw==1 & black==1 ~ 1,
                                     hispanic_mc_nw==1 & hispanic==1 ~ 1,
                                     asian_mc_nw==1 & asian==1 ~ 1,
                                     nonwhite_mc==0 & nonwhite==0 ~ 1,
                                     TRUE ~ 0))

# separate dataframe to work with lags and leads 
treat_lag_evens <- df_even7 %>%
  select(cong, state_dist, nonwhite_and_racecong, race_congruence, gender_congruence) %>%
  group_by(cong, state_dist)

treat_lag_evens <- unique(treat_lag_evens)

treat_lag_evens <- treat_lag_evens %>%
  arrange(state_dist, cong) 

## MC lags (t-1)
treat_lag_evens$lag_nonwhite_and_racecong <- ave(treat_lag_evens$nonwhite_and_racecong, treat_lag_evens$state_dist, FUN=lag)
treat_lag_evens$lag_race_congruent <- ave(treat_lag_evens$race_congruence, treat_lag_evens$state_dist, FUN=lag)
treat_lag_evens$lag_gen_congruent <- ave(treat_lag_evens$gender_congruence, treat_lag_evens$state_dist, FUN=lag)
## MC leads (t-1)
treat_lag_evens$lead_nonwhite_and_racecong <- ave(treat_lag_evens$nonwhite_and_racecong, treat_lag_evens$state_dist, FUN=lead)
treat_lag_evens$lead_race_congruent <- ave(treat_lag_evens$race_congruence, treat_lag_evens$state_dist, FUN=lead)
treat_lag_evens$lead_gen_congruent <- ave(treat_lag_evens$gender_congruence, treat_lag_evens$state_dist, FUN=lead)
## MC lags (t-2)
treat_lag_evens$lag2_nonwhite_and_racecong <- ave(treat_lag_evens$nonwhite_and_racecong, treat_lag_evens$state_dist, FUN=lag2)
treat_lag_evens$lag2_race_congruent <- ave(treat_lag_evens$race_congruence, treat_lag_evens$state_dist, FUN=lag2)
treat_lag_evens$lag2_gen_congruent <- ave(treat_lag_evens$gender_congruence, treat_lag_evens$state_dist, FUN=lag2)
## MC leads (t-2)
treat_lag_evens$lead2_nonwhite_and_racecong <- ave(treat_lag_evens$nonwhite_and_racecong, treat_lag_evens$state_dist, FUN=lead2)
treat_lag_evens$lead2_race_congruent <- ave(treat_lag_evens$race_congruence, treat_lag_evens$state_dist, FUN=lead2)
treat_lag_evens$lead2_gen_congruent <- ave(treat_lag_evens$gender_congruence, treat_lag_evens$state_dist, FUN=lead2)

# adding to dataframe 
df_even8 <- df_even7 %>%
  left_join(treat_lag_evens, by = c("cong", "state_dist", "nonwhite_and_racecong", "race_congruence", "gender_congruence"))


# ODDS 
df_odd7 <- df_odd6 %>%
  # Set up binary variables for gender and race congruence 
  mutate(female_congruence = ifelse(mc_gender==1 & gender==1, 1, 0),
         gender_congruence = case_when(mc_gender==1 & gender==1 ~ 1,
                                       mc_gender==0 & gender==0 ~ 1,
                                       TRUE ~ 0),
         male_congruence = ifelse(mc_gender==0 & gender==0, 1, 0),
         voter_female = ifelse(gender==1, 1, 0),
         voter_male = ifelse(gender==0, 1, 0),
         mc_female = ifelse(mc_gender==1, 1, 0),
         mc_male = ifelse(mc_gender==0, 1, 0),
         nonwhite_congruence = ifelse(nonwhite_mc==1 & nonwhite==1, 1, 0),
         white_congruence = ifelse(nonwhite_mc==0 & nonwhite==0, 1, 0),
         black_congruence = ifelse(black_mc_nw==1 & black==1, 1, 0),
         asian_congruence = ifelse(asian_mc_nw==1 & asian==1, 1, 0),
         hispanic_congruence = ifelse(hispanic_mc_nw==1 & hispanic==1, 1, 0),
         nonwhite_and_racecong = case_when((nonwhite==1 & black_mc_nw==1 & black==1) ~ 1,
                                           (nonwhite==1 & hispanic_mc_nw==1 & hispanic==1) ~ 1,
                                           (nonwhite==1 & asian_mc_nw==1 & asian==1) ~ 1,
                                           TRUE ~ 0),
         race_congruence = case_when(black_mc_nw==1 & black==1 ~ 1,
                                     hispanic_mc_nw==1 & hispanic==1 ~ 1,
                                     asian_mc_nw==1 & asian==1 ~ 1,
                                     nonwhite_mc==0 & nonwhite==0 ~ 1,
                                     TRUE ~ 0))

# separate dataframe to work with lags and leads 
treat_lag_odds <- df_odd7 %>%
  select(cong, state_dist, nonwhite_and_racecong, race_congruence, gender_congruence) %>%
  group_by(cong, state_dist)

treat_lag_odds <- unique(treat_lag_odds)

treat_lag_odds <- treat_lag_odds %>%
  arrange(state_dist, cong) 

## MC lags (t-1)
treat_lag_odds$lag_nonwhite_and_racecong <- ave(treat_lag_odds$nonwhite_and_racecong, treat_lag_odds$state_dist, FUN=lag)
treat_lag_odds$lag_race_congruent <- ave(treat_lag_odds$race_congruence, treat_lag_odds$state_dist, FUN=lag)
treat_lag_odds$lag_gen_congruent <- ave(treat_lag_odds$gender_congruence, treat_lag_odds$state_dist, FUN=lag)
## MC leads (t-1)
treat_lag_odds$lead_nonwhite_and_racecong <- ave(treat_lag_odds$nonwhite_and_racecong, treat_lag_odds$state_dist, FUN=lead)
treat_lag_odds$lead_race_congruent <- ave(treat_lag_odds$race_congruence, treat_lag_odds$state_dist, FUN=lead)
treat_lag_odds$lead_gen_congruent <- ave(treat_lag_odds$gender_congruence, treat_lag_odds$state_dist, FUN=lead)
## MC lags (t-2)
treat_lag_odds$lag2_nonwhite_and_racecong <- ave(treat_lag_odds$nonwhite_and_racecong, treat_lag_odds$state_dist, FUN=lag2)
treat_lag_odds$lag2_race_congruent <- ave(treat_lag_odds$race_congruence, treat_lag_odds$state_dist, FUN=lag2)
treat_lag_odds$lag2_gen_congruent <- ave(treat_lag_odds$gender_congruence, treat_lag_odds$state_dist, FUN=lag2)
## MC leads (t-2)
treat_lag_odds$lead2_nonwhite_and_racecong <- ave(treat_lag_odds$nonwhite_and_racecong, treat_lag_odds$state_dist, FUN=lead2)
treat_lag_odds$lead2_race_congruent <- ave(treat_lag_odds$race_congruence, treat_lag_odds$state_dist, FUN=lead2)
treat_lag_odds$lead2_gen_congruent <- ave(treat_lag_odds$gender_congruence, treat_lag_odds$state_dist, FUN=lead2)

# adding to dataframe 
df_odd8 <- df_odd7 %>%
  left_join(treat_lag_odds, by = c("cong", "state_dist", "nonwhite_and_racecong", "race_congruence", "gender_congruence"))

#### IDEOLOGY CODING ####
# Rename data for Clarity
df_even <- df_even8
df_odd <- df_odd8

### IDEOLOGY CONGRUENCE MEASURE 1: DW-NOMINATE & CCES SELF-IDEO ----

df_even2 <- df_even %>% 
  # Need to put DW-NOMINATE and Self-ID on the same scale
  # DW-NOM is on a -1 to 1 scale, so we can put Self-ID on this scale 
  # (don't rescale DW-NOM to 0-1 bc no MC has -1 or 1 score which makes rescaling weird)
  mutate(ideo_self_new = rescale(ideo_self, to = c(-1, 1)),
         ideo_self_new_na = rescale(ideo_self_na, to = c(-1, 1)),
         # For congruence, need distance between self-ideo and DW-NOM
         # This is a measure of the absolute distance
         incongruence_ideo1 = round(abs(nominate_dim1 - ideo_self_new),3),
         incongruence_ideo1_na = round(abs(nominate_dim1 - ideo_self_new_na),3),
         # Can also do a measure of congruence that shows if the MC is more lib or con than respondent
         incongruence_ideo1.2 = round((nominate_dim1 - ideo_self_new),3),
         incongruence_ideo1.2_na = round((nominate_dim1 - ideo_self_new_na),3))

df_odd2 <- df_odd %>%
  # Need to put DW-NOMINATE and Self-ID on the same scale
  # DW-NOM is on a -1 to 1 scale, so we can put Self-ID on this scale
  # (don't rescale DW-NOM to 0-1 bc no MC has -1 or 1 score which makes rescaling weird)
  mutate(ideo_self_new = rescale(ideo_self, to = c(-1, 1)),
         ideo_self_new_na = rescale(ideo_self_na, to = c(-1, 1)),
         # For congruence, need distance between self-ideo and DW-NOM
         # This is a measure of the absolute distance
         incongruence_ideo1 = round(abs(nominate_dim1 - ideo_self_new),3),
         incongruence_ideo1_na = round(abs(nominate_dim1 - ideo_self_new_na),3),
         # Can also do a measure of congruence that shows if the MC is more lib or con than respondent
         incongruence_ideo1.2 = round((nominate_dim1 - ideo_self_new),3), # positive: MC more conservative; negative: respondent more conservative
         incongruence_ideo1.2_na = round((nominate_dim1 - ideo_self_new_na),3))

### IDEOLOGY CONGRUENCE MEASURE 2: MC IDEO PLACEMENT & CCES SELF-IDEO PLACEMENT ----

# This requires going back to each of the CCES datasets to get the respondent ideo placement for MC or incumbent
# Then we can join these values onto the main dataset by year and case_id 

## RECODE MC IDEO TO BE CONSISTENT ACROSS YEARS ----
# 2006 - Leave out ----
# v3045 Ideology Placement-House Candidate 1 & v3045 Ideology Placement-House Candidate 2
# problem here because don't include member ideology if they're not a candidate (retiring) 

## LEAVING OUT 2006! 
# 2007 ----
# CC31 - Where would you place your member on 0 to 100
cces_2007_sub <- cces_2007 %>%
  mutate(year = 2007) %>%
  select(year, caseid, CC31) %>%
  rename(ideo_mc = CC31) %>%
  #mutate(ideo_mc = as.numeric(as.character(ideo_mc))) %>%
  # deal with "skipped" and "not sure"
  mutate(ideo_mc_na = ifelse(ideo_mc == "Not sure", NA, ideo_mc),
         ideo_mc_na = ifelse(ideo_mc == "Skipped", NA, ideo_mc),
         # create non-NA measures with "not sure" as middle value
         ideo_mc = ifelse(ideo_mc == "Not sure", 50, ideo_mc),
         ideo_mc = ifelse(ideo_mc == "Skipped", NA, ideo_mc),
         ideo_mc_na = as.numeric(ideo_mc_na)) %>%
  # Convert the 0-100 to 0-1
  mutate(ideo_mc = ideo_mc/100,
         ideo_mc_na = ideo_mc_na/100) %>%
  rename(case_id = caseid)

# 2008  ----
# CC317k house candidate 1 (D), CC317l house candidate 2 (R), CC317m house member if retiring
# V526 - House member incumbent - (2 retiring, 1 incumbent, NAs)
# Dem is cand1 and rep is cand2
# V535 - House member's party
cces_2008_sub <- cces_2008 %>%
  mutate(year = 2008) %>%
  select(year, V100, CC317k, CC317l, CC317m, V526, V535) %>%
  mutate(ideo_mc = case_when(V526==2 ~ CC317m,
                             V535=="Democratic" ~ CC317k,
                             V535=="Republican" ~ CC317l,
                             TRUE ~ NA_real_)) %>%
  mutate(ideo_mc = as.numeric(as.character(ideo_mc))) %>%
  # Convert 997 "Not sure" to NA, and 998 "skipped" and 999 "Not asked" to NA - seems like there are none?
  mutate(ideo_mc_na = case_when(ideo_mc==997 ~ NA_real_, ideo_mc==998 ~ NA_real_,
                                ideo_mc==999 ~ NA_real_, TRUE ~ ideo_mc)) %>%
  # Convert Not sure to middle value, others to na - again, would be none
  mutate(ideo_mc = ifelse(ideo_mc==997, 50, ideo_mc)) %>%
  # Convert the 0-100 to 0-1
  mutate(ideo_mc = ideo_mc/100,
         ideo_mc_na = ideo_mc_na/100) %>%
  rename(case_id = V100) %>%
  select(year, case_id, ideo_mc, ideo_mc_na)

# 2009  ----
# CC09_42F - ideology representative 
cces_2009_sub <- cces_2009 %>%
  mutate(year = 2009) %>%
  select(year, v100, cc09_42f) %>%
  rename(ideo_mc = cc09_42f) %>%
  rename(case_id = v100) %>%
  # there are no "don't know" values it appears, so not changing any NAs
  mutate(ideo_mc_na = ideo_mc)

# Convert the 7pt to 0-1
cces_2009_sub$ideo_mc <- round((cces_2009_sub$ideo_mc-min(na.omit(cces_2009_sub$ideo_mc)))/
                                 (max(na.omit(cces_2009_sub$ideo_mc))-min(na.omit(cces_2009_sub$ideo_mc))),2)

cces_2009_sub$ideo_mc_na <- round((cces_2009_sub$ideo_mc_na-min(na.omit(cces_2009_sub$ideo_mc_na)))/
                                    (max(na.omit(cces_2009_sub$ideo_mc_na))-min(na.omit(cces_2009_sub$ideo_mc_na))),2)

# 2010 ----
# CC334L - $CurrentHouseName, CC334J - $HouseCandDemName, CC334K - $HouseCandRepName
# V504 - House incumbent's retirement status, 1-retiring, 0-not, -9 not asked, -8 skipped
# V535 - Democratic House candidate incumbent
# V538 - Republican House candidate incumbent
cces_2010_sub <- cces_2010 %>%
  mutate(year = 2010) %>%
  select(year, V100, CC334L, CC334J, CC334K, V504, V535, V538) %>%
  mutate(ideo_mc = case_when(V535 == 1 ~ CC334J,
                             V538 == 1 ~ CC334K,
                             V504 == 1 ~ CC334L,
                             TRUE ~ NA_real_)) %>%
  # deal with 8 ("skipped"/"not sure")
  mutate(ideo_mc_na = ifelse(ideo_mc == 8, NA, ideo_mc),
         # create non-NA measures with "not sure" as middle value 
         ideo_mc = ifelse(ideo_mc == 8, 4, ideo_mc),
         ideo_mc = as.numeric(ideo_mc),
         ideo_mc_na = as.numeric(ideo_mc_na)) %>%
  rename(case_id = V100) %>%
  select(year, case_id, ideo_mc, ideo_mc_na) 

# Convert the 7pt to 0-1 
cces_2010_sub$ideo_mc <- round((cces_2010_sub$ideo_mc-min(na.omit(cces_2010_sub$ideo_mc)))/
                                 (max(na.omit(cces_2010_sub$ideo_mc))-min(na.omit(cces_2010_sub$ideo_mc))),2)

cces_2010_sub$ideo_mc_na <- round((cces_2010_sub$ideo_mc_na-min(na.omit(cces_2010_sub$ideo_mc_na)))/
                                    (max(na.omit(cces_2010_sub$ideo_mc_na))-min(na.omit(cces_2010_sub$ideo_mc_na))),2)


# 2011 ---- 
# CC342H - Ideology grid - house rep
cces_2011_sub <- cces_2011 %>%
  mutate(year = 2011) %>%
  select(year, V100, CC342H) %>%
  rename(ideo_mc = CC342H) %>%
  mutate(ideo_mc = as.numeric(as.character(ideo_mc))) %>%
  # Deal with 8 ("not sure")
  mutate(ideo_mc_na = ifelse(ideo_mc==8, NA, ideo_mc),
         # not sure as middle value
         ideo_mc = ifelse(ideo_mc==8, 4, ideo_mc)) %>%
  rename(case_id = V100)

# Convert the 7pt to 0-1
cces_2011_sub$ideo_mc <- round((cces_2011_sub$ideo_mc-min(na.omit(cces_2011_sub$ideo_mc)))/
                                 (max(na.omit(cces_2011_sub$ideo_mc))-min(na.omit(cces_2011_sub$ideo_mc))),2)

cces_2011_sub$ideo_mc_na <- round((cces_2011_sub$ideo_mc_na-min(na.omit(cces_2011_sub$ideo_mc_na)))/
                                    (max(na.omit(cces_2011_sub$ideo_mc_na))-min(na.omit(cces_2011_sub$ideo_mc_na))),2)

# 2012 ---- 
# CC334N - Ideology - House member, CC334L - candidate 1, CC334M - candidate 2 
# CurrentHouseRetiring_post - current house retiring (1 = retiring)
# HouseCand1Incumbent 
# HouseCand2Incumbent
cces_2012_sub <- cces_2012 %>%
  mutate(year = 2012) %>%
  select(year, V101, CC334N, CC334L, CC334M, CurrentHouseRetiring_post, HouseCand1Incumbent, HouseCand2Incumbent) %>%
  mutate(ideo_mc = case_when(HouseCand1Incumbent == 1 ~ CC334L,
                             HouseCand2Incumbent == 1 ~ CC334M,
                             CurrentHouseRetiring_post == 1 ~ CC334N,
                             TRUE ~ NA_real_)) %>%
  # Deal with 8 ("not sure")
  mutate(ideo_mc_na = ifelse(ideo_mc==8, NA, ideo_mc),
         # not sure as middle value 
         ideo_mc = ifelse(ideo_mc==8, 4, ideo_mc)) %>%
  rename(case_id = V101) %>%
  select(year, case_id, ideo_mc, ideo_mc_na)

# Convert the 7pt to 0-1 
cces_2012_sub$ideo_mc <- round((cces_2012_sub$ideo_mc-min(na.omit(cces_2012_sub$ideo_mc)))/
                                 (max(na.omit(cces_2012_sub$ideo_mc))-min(na.omit(cces_2012_sub$ideo_mc))),2)

cces_2012_sub$ideo_mc_na <- round((cces_2012_sub$ideo_mc_na-min(na.omit(cces_2012_sub$ideo_mc_na)))/
                                    (max(na.omit(cces_2012_sub$ideo_mc_na))-min(na.omit(cces_2012_sub$ideo_mc_na))),2)


# 2013 ---- 
# CC334N - How would you rate each of the following individuals and groups? 7pt scale - $CurrentHouseName
cces_2013_sub <- cces_2013 %>%
  mutate(year = 2013) %>%
  select(year, caseid, CC334N) %>%
  rename(ideo_mc = CC334N) %>%
  mutate(ideo_mc = as.numeric(as.character(ideo_mc))) %>%
  # Deal with 8 ("not sure")
  mutate(ideo_mc_na = ifelse(ideo_mc==8, NA, ideo_mc),
         # not sure as middle value
         ideo_mc = ifelse(ideo_mc==8, 4, ideo_mc)) %>%
  rename(case_id = caseid)

# Convert the 7pt to 0-1
cces_2013_sub$ideo_mc <- round((cces_2013_sub$ideo_mc-min(na.omit(cces_2013_sub$ideo_mc)))/
                                 (max(na.omit(cces_2013_sub$ideo_mc))-min(na.omit(cces_2013_sub$ideo_mc))),2)

cces_2013_sub$ideo_mc_na <- round((cces_2013_sub$ideo_mc_na-min(na.omit(cces_2013_sub$ideo_mc_na)))/
                                    (max(na.omit(cces_2013_sub$ideo_mc_na))-min(na.omit(cces_2013_sub$ideo_mc_na))),2)

# 2014 ---- 
# CC334T House candidate 1, CC334U House candidate 2, CC334V House member if not running 
# HouseCand1IncumbentNum - house can 1 incumbent
# HouseCand2IncumbentNum - house can 2 incumbent
cces_2014_sub <- cces_2014 %>%
  mutate(year = 2014) %>%
  select(year, V101, CC334T, CC334U, CC334V, HouseCand1IncumbentNum, HouseCand2IncumbentNum) %>%
  mutate(ideo_mc = case_when(HouseCand1IncumbentNum == 1 ~ CC334T,
                             HouseCand2IncumbentNum == 1 ~ CC334U,
                             HouseCand1IncumbentNum != 1 & HouseCand2IncumbentNum != 1 ~ CC334V)) %>%
  mutate(ideo_mc = as.numeric(as.character(ideo_mc))) %>%
  # Deal with "not sure" 
  mutate(ideo_mc_na = ifelse(ideo_mc==8, NA, ideo_mc),
         # not sure as middle value 
         ideo_mc = ifelse(ideo_mc==8, 4, ideo_mc)) %>%
  rename(case_id = V101) %>%
  select(year, case_id, ideo_mc, ideo_mc_na)

# Convert the 7pt to 0-1 
cces_2014_sub$ideo_mc <- round((cces_2014_sub$ideo_mc-min(na.omit(cces_2014_sub$ideo_mc)))/
                                 (max(na.omit(cces_2014_sub$ideo_mc))-min(na.omit(cces_2014_sub$ideo_mc))),2)

cces_2014_sub$ideo_mc_na <- round((cces_2014_sub$ideo_mc_na-min(na.omit(cces_2014_sub$ideo_mc_na)))/
                                    (max(na.omit(cces_2014_sub$ideo_mc_na))-min(na.omit(cces_2014_sub$ideo_mc_na))),2)


# 2015 ---- 
# CC15_340h how would you rate each of the following individuals and groups? 7pt scale - $CurrentHouseName
cces_2015_sub <- cces_2015 %>%
  mutate(year = 2015) %>%
  select(year, V101, CC15_340h) %>%
  rename(ideo_mc = CC15_340h) %>%
  mutate(ideo_mc = as.numeric(as.character(ideo_mc))) %>%
  # Deal with 8 ("not sure"), 98 (skipped) and 99 (not asked)
  mutate(ideo_mc_na = ifelse(ideo_mc==8 | ideo_mc==98 | ideo_mc==99, NA, ideo_mc),
         # not sure as middle value
         ideo_mc = ifelse(ideo_mc==8, 4, ideo_mc)) %>%
  rename(case_id = V101)

# Convert the 7pt to 0-1
cces_2015_sub$ideo_mc <- round((cces_2015_sub$ideo_mc-min(na.omit(cces_2015_sub$ideo_mc)))/
                                 (max(na.omit(cces_2015_sub$ideo_mc))-min(na.omit(cces_2015_sub$ideo_mc))),2)

cces_2015_sub$ideo_mc_na <- round((cces_2015_sub$ideo_mc_na-min(na.omit(cces_2015_sub$ideo_mc_na)))/
                                    (max(na.omit(cces_2015_sub$ideo_mc_na))-min(na.omit(cces_2015_sub$ideo_mc_na))),2)


# 2016 ---- 
# CC16_340p - How would you rate each of the following groups - 7pt scale House Member 
# CC16_340n - house cand 1, CC16_340o - house cand 2 
# HouseCand1Incumbent - candidate 1 incumbent
# HouseCand2Incumbent - candidate 2 incumbent
cces_2016_sub <- cces_2016 %>%
  mutate(year = 2016) %>%
  select(year, V101, CC16_340p, CC16_340n, CC16_340o, HouseCand1Incumbent, HouseCand2Incumbent) %>%
  mutate(ideo_mc = case_when(HouseCand1Incumbent == 1 ~ CC16_340n,
                             HouseCand2Incumbent == 1 ~ CC16_340o,
                             HouseCand1Incumbent != 1 & HouseCand2Incumbent != 1 ~ CC16_340p)) %>%
  mutate(ideo_mc = as.numeric(as.character(ideo_mc))) %>%
  # Deal with 8 ("not sure")
  mutate(ideo_mc_na = ifelse(ideo_mc==8, NA, ideo_mc),
         # not sure as middle value 
         ideo_mc = ifelse(ideo_mc==8, 4, ideo_mc)) %>%
  rename(case_id = V101) %>%
  select(year, case_id, ideo_mc, ideo_mc_na)

# Convert the 7pt to 0-1 
cces_2016_sub$ideo_mc <- round((cces_2016_sub$ideo_mc-min(na.omit(cces_2016_sub$ideo_mc)))/
                                 (max(na.omit(cces_2016_sub$ideo_mc))-min(na.omit(cces_2016_sub$ideo_mc))),2)

cces_2016_sub$ideo_mc_na <- round((cces_2016_sub$ideo_mc_na-min(na.omit(cces_2016_sub$ideo_mc_na)))/
                                    (max(na.omit(cces_2016_sub$ideo_mc_na))-min(na.omit(cces_2016_sub$ideo_mc_na))),2)

# 2017 ---- 
# CC17_350h - how would you rate each of the following individuals and groups? 7pt scale - $CurrentHouseName
cces_2017_sub <- cces_2017 %>%
  mutate(year = 2017) %>%
  select(year, V101, CC17_350h) %>%
  rename(ideo_mc = CC17_350h) %>%
  mutate(ideo_mc = as.numeric(as.character(ideo_mc))) %>%
  # Deal with 8 ("not sure"), 98 (skipped) and 99 (not asked)
  mutate(ideo_mc_na = ifelse(ideo_mc==8 | ideo_mc==98 | ideo_mc==99, NA, ideo_mc),
         # not sure as middle value
         ideo_mc = ifelse(ideo_mc==8, 4, ideo_mc)) %>%
  rename(case_id = V101)

# Convert the 7pt to 0-1
cces_2017_sub$ideo_mc <- round((cces_2017_sub$ideo_mc-min(na.omit(cces_2017_sub$ideo_mc)))/
                                 (max(na.omit(cces_2017_sub$ideo_mc))-min(na.omit(cces_2017_sub$ideo_mc))),2)

cces_2017_sub$ideo_mc_na <- round((cces_2017_sub$ideo_mc_na-min(na.omit(cces_2017_sub$ideo_mc_na)))/
                                    (max(na.omit(cces_2017_sub$ideo_mc_na))-min(na.omit(cces_2017_sub$ideo_mc_na))),2)


# 2018 ---- 
# CC18_334O - Ideological Placement – $CurrentHouseName, CC18_334M $HouseCand1Name, CC18_334N $HouseCand2Name
# CurrentHouseName -- name of current House member
# HouseCand1Name - name of cand 1, HouseCand2Name - name of cand 2
cces_2018_sub <- cces_2018 %>%
  mutate(year = 2018) %>%
  select(year, caseid, CC18_334O, CC18_334M, CC18_334N, CurrentHouseParty, HouseCand1Party, HouseCand2Party) %>%
  mutate(ideo_mc = case_when(CurrentHouseParty == HouseCand1Party ~ CC18_334M,
                             CurrentHouseParty == HouseCand2Party ~ CC18_334N,
                             CurrentHouseParty != HouseCand1Party & CurrentHouseParty != HouseCand2Party ~ CC18_334O, 
                             TRUE ~ NA_real_),
         ideo_mc = as.numeric(as.character(ideo_mc))) %>%
  # Deal with 8 ("not sure")
  mutate(ideo_mc_na = ifelse(ideo_mc==8, NA, ideo_mc),
         # not sure as middle value 
         ideo_mc = ifelse(ideo_mc==8, 4, ideo_mc)) %>%
  rename(case_id = caseid) %>%
  select(year, case_id, ideo_mc, ideo_mc_na)

# Convert the 7pt to 0-1 
cces_2018_sub$ideo_mc <- round((cces_2018_sub$ideo_mc-min(na.omit(cces_2018_sub$ideo_mc)))/
                                 (max(na.omit(cces_2018_sub$ideo_mc))-min(na.omit(cces_2018_sub$ideo_mc))),2)

cces_2018_sub$ideo_mc_na <- round((cces_2018_sub$ideo_mc_na-min(na.omit(cces_2018_sub$ideo_mc_na)))/
                                    (max(na.omit(cces_2018_sub$ideo_mc_na))-min(na.omit(cces_2018_sub$ideo_mc_na))),2)


# 2019 ---- 
# CC19_334i - how would you rate each of the following individuals and groups? 7pt scale - $CurrentHouseName
cces_2019_sub <- cces_2019 %>%
  mutate(year = 2019) %>%
  select(year, caseid, CC19_334i) %>%
  rename(ideo_mc = CC19_334i) %>%
  mutate(ideo_mc = as.numeric(as.character(ideo_mc))) %>%
  # Deal with 8 ("not sure")
  mutate(ideo_mc_na = ifelse(ideo_mc==8, NA, ideo_mc),
         # not sure as middle value
         ideo_mc = ifelse(ideo_mc==8, 4, ideo_mc)) %>%
  rename(case_id = caseid)

# Convert the 7pt to 0-1
cces_2019_sub$ideo_mc <- round((cces_2019_sub$ideo_mc-min(na.omit(cces_2019_sub$ideo_mc)))/
                                 (max(na.omit(cces_2019_sub$ideo_mc))-min(na.omit(cces_2019_sub$ideo_mc))),2)

cces_2019_sub$ideo_mc_na <- round((cces_2019_sub$ideo_mc_na-min(na.omit(cces_2019_sub$ideo_mc_na)))/
                                    (max(na.omit(cces_2019_sub$ideo_mc_na))-min(na.omit(cces_2019_sub$ideo_mc_na))),2)


# 2020 ----
# ideology CC20_340k House candidate 1, CC20_340l House candidate 2, CC20_340m House member if not running 
# HouseCand1Name - house candidate 1 name
# HouseCand2Name - house candidate 2 name
# CurrentHouseName - current member name 
cces_2020_sub <- cces_2020 %>%
  mutate(year = 2020) %>%
  select(year, caseid, CC20_340k, CC20_340l, CC20_340m, HouseCand1Name, HouseCand2Name, CurrentHouseName) %>%
  mutate(ideo_mc = case_when(HouseCand1Name == CurrentHouseName ~ CC20_340k,
                             HouseCand2Name == CurrentHouseName ~ CC20_340l,
                             HouseCand1Name != CurrentHouseName & HouseCand2Name != CurrentHouseName
                             ~ CC20_340m)) %>%
  mutate(ideo_mc = as.numeric(as.character(ideo_mc))) %>%
  # Deal with 8 ("not sure")
  mutate(ideo_mc_na = ifelse(ideo_mc==8, NA, ideo_mc),
         # not sure as middle value 
         ideo_mc = ifelse(ideo_mc==8, 4, ideo_mc)) %>%
  rename(case_id = caseid) %>%
  select(year, case_id, ideo_mc_na, ideo_mc)

# Convert the 7pt to 0-1 
cces_2020_sub$ideo_mc <- round((cces_2020_sub$ideo_mc-min(na.omit(cces_2020_sub$ideo_mc)))/
                                 (max(na.omit(cces_2020_sub$ideo_mc))-min(na.omit(cces_2020_sub$ideo_mc))),2)

cces_2020_sub$ideo_mc_na <- round((cces_2020_sub$ideo_mc_na-min(na.omit(cces_2020_sub$ideo_mc_na)))/
                                    (max(na.omit(cces_2020_sub$ideo_mc_na))-min(na.omit(cces_2020_sub$ideo_mc_na))),2)

## BIND YEAR, CASEID, IDEO_MC ----

# Even years 
mc_ideo_evens <- rbind(cces_2008_sub, cces_2010_sub, cces_2012_sub, cces_2014_sub, 
                       cces_2016_sub, cces_2018_sub, cces_2020_sub)

#Odd years
mc_ideo_odds <- rbind(cces_2007_sub, cces_2009_sub, cces_2011_sub, cces_2013_sub,
                      cces_2015_sub, cces_2017_sub, cces_2019_sub)
mc_ideo_odds <- mc_ideo_odds %>%
  rename(year2 = year)

## JOIN ONTO MAIN DATASETS ---- 

# Even years 
df_even3 <- df_even2 %>%
  left_join(mc_ideo_evens, by = c("year", "case_id"))

# Odd years 
df_odd3 <- df_odd2 %>%
  left_join(mc_ideo_odds, by = c("year2", "case_id"))

## GENERATE CONGRUENCE MEASURES ----

# Evens 
df_even4 <- df_even3 %>% 
  # Both self-ideo and mc-ideo are on the 0-1 scale  
  # For congruence, need distance between self-ideo and mc-ideo
  # This is a measure of the absolute distance
  mutate(incongruence_ideo2 = round(abs(ideo_mc - ideo_self),3),
         incongruence_ideo2_na = round(abs(ideo_mc_na - ideo_self_na),3),
         # Can also do a measure of congruence that shows if the MC is more lib or con than respondent
         incongruence_ideo2.2 = round((ideo_mc - ideo_self),3),
         incongruence_ideo2.2_na = round((ideo_mc_na - ideo_self_na),3))

# Odds 
df_odd4 <- df_odd3 %>%
  # Both self-ideo and mc-ideo are on the 0-1 scale
  # For congruence, need distance between self-ideo and mc-ideo
  # This is a measure of the absolute distance
  mutate(incongruence_ideo2 = round(abs(ideo_mc - ideo_self),3),
         incongruence_ideo2_na = round(abs(ideo_mc_na - ideo_self_na),3),
         # Can also do a measure of congruence that shows if the MC is more lib or con than respondent
         incongruence_ideo2.2 = round((ideo_mc - ideo_self),3),
         incongruence_ideo2.2_na = round((ideo_mc_na - ideo_self_na),3))


## GENERATE TERCILE VARIABLES FOR THE TWO INCONGRUENCE MEASURES ####

# range incongruence_ideo1 = [0,2] -> 0 to .67, .68 to 1.33, 1.34 to 2
# range incongruence_ideo1.2 = [-2,2] -> not doing for directional measure
# range incongruence_ideo2 = [0,1] -> 0 to .33, .34 to .67, .68 to 1
# range incongruence_ideo2.2 = [-1,1] -> not doing for directional measure 

# EVENS 
df_even5 <- df_even4 %>%
  mutate(inc_tercile_ideo1 = case_when(incongruence_ideo1 <= .67 ~ 1,
                                       incongruence_ideo1 > .67 & incongruence_ideo1 <= 1.33 ~ 2,
                                       incongruence_ideo1 > 1.33 ~ 3,
                                       TRUE ~ NA_real_),
         inc_tercile_ideo1_na = case_when(incongruence_ideo1_na <= .67 ~ 1,
                                          incongruence_ideo1_na > .67 & incongruence_ideo1_na <= 1.33 ~ 2,
                                          incongruence_ideo1_na > 1.33 ~ 3,
                                          TRUE ~ NA_real_),
         inc_tercile_ideo2 = case_when(incongruence_ideo2 <= .33 ~ 1,
                                       incongruence_ideo2 > .33 & incongruence_ideo2 <= .67 ~ 2,
                                       incongruence_ideo2 > .67 ~ 3,
                                       TRUE ~ NA_real_),
         inc_tercile_ideo2_na = case_when(incongruence_ideo2_na <= .33 ~ 1,
                                          incongruence_ideo2_na > .33 & incongruence_ideo2_na <= .67 ~ 2,
                                          incongruence_ideo2_na > .67 ~ 3,
                                          TRUE ~ NA_real_))

# ODDS
df_odd5 <- df_odd4 %>%
  mutate(inc_tercile_ideo1 = case_when(incongruence_ideo1 <= .67 ~ 1,
                                       incongruence_ideo1 > .67 & incongruence_ideo1 <= 1.33 ~ 2,
                                       incongruence_ideo1 > 1.33 ~ 3,
                                       TRUE ~ NA_real_),
         inc_tercile_ideo1_na = case_when(incongruence_ideo1_na <= .67 ~ 1,
                                          incongruence_ideo1_na > .67 & incongruence_ideo1_na <= 1.33 ~ 2,
                                          incongruence_ideo1_na > 1.33 ~ 3,
                                          TRUE ~ NA_real_),
         inc_tercile_ideo2 = case_when(incongruence_ideo2 <= .33 ~ 1,
                                       incongruence_ideo2 > .33 & incongruence_ideo2 <= .67 ~ 2,
                                       incongruence_ideo2 > .67 ~ 3,
                                       TRUE ~ NA_real_),
         inc_tercile_ideo2_na = case_when(incongruence_ideo2_na <= .33 ~ 1,
                                          incongruence_ideo2_na > .33 & incongruence_ideo2_na <= .67 ~ 2,
                                          incongruence_ideo2_na > .67 ~ 3,
                                          TRUE ~ NA_real_))

## GENERATE PERCIEVED VS. REAL MEASURE ####

# Subtract real ideology (DW-NOMINATE) from perceived ideology (respondent MC ideo placement in CCES)
# Rescale CCES value to match DW-NOMINATE on [-1,1]
df_even6 <- df_even5 %>%
  mutate(ideo_mc_rescale = rescale(ideo_mc, to = c(-1,1)),
         ideo_mc_rescale_na = rescale(ideo_mc_na, to = c(-1,1)))

df_odd6 <- df_odd5 %>%
  mutate(ideo_mc_rescale = rescale(ideo_mc, to = c(-1,1)),
         ideo_mc_rescale_na = rescale(ideo_mc_na, to = c(-1,1)))

# EVENS 
df_even7 <- df_even6 %>%
  mutate(real_perc_dif = ideo_mc_rescale - nominate_dim1,
         real_perc_dif_na = ideo_mc_rescale_na - nominate_dim1)

# ODDS 
df_odd7 <- df_odd6 %>%
  mutate(real_perc_dif = ideo_mc_rescale - nominate_dim1,
         real_perc_dif_na = ideo_mc_rescale_na - nominate_dim1)

#### POLICY CONGRUENCE ####
## First combine the roll call vote data from VoteView for the MC positions ----
# Load Data
mc_bills <- list.files("./Policy Congruence Data/Voteview Data") # loading list of Voteview bills 
mc_bills <- mc_bills[mc_bills != "Icon\r"] # removing icon files 

# generate forloop to pull all voteview data together 
for (i in 1:length(mc_bills)) {
  # read in 3 sheets from each Voteview dataset
  sheet1 <- read_xls(paste("./Policy Congruence Data/Voteview Data/", mc_bills[i], sep=""), sheet = 1)
  sheet2 <- read_xls(paste("./Policy Congruence Data/Voteview Data/", mc_bills[i], sep=""), sheet = 2)
  
  # join sheets 1 and 2 (vote matrix and member descriptions) by icpsr, id, state_abbrev, name
  sheets_1_2 <- sheet1 %>% left_join(sheet2, by = c("icpsr", "id", "state_abbrev", "name"))
  
  # code whether the vote was majority R or majority D 
  R_yes_votes <- nrow(subset(sheets_1_2, sheets_1_2$V1 == 1 & sheets_1_2$party_code == 200)) 
  R_other_votes <- nrow(subset(sheets_1_2, sheets_1_2$party_code == 200)) - R_yes_votes 
  D_yes_votes <- nrow(subset(sheets_1_2, sheets_1_2$V1 == 1 & sheets_1_2$party_code == 100))
  D_other_votes <- nrow(subset(sheets_1_2, sheets_1_2$party_code == 100)) - D_yes_votes
  
  # Generating when a bill went majority R or D 
  R_vote <- R_yes_votes > R_other_votes & D_yes_votes < D_other_votes
  D_vote <- R_yes_votes < R_other_votes & D_yes_votes > D_other_votes
  bipartisan_vote <- (R_yes_votes > R_other_votes & D_yes_votes > D_other_votes) | (R_yes_votes < R_other_votes & D_yes_votes < D_other_votes)
  
  # Cleaning up the data 
  sheets_1_2 <- sheets_1_2 %>%
    mutate(V1 = case_when(V1 == 1 ~ 1,
                          V1 == 6 ~ 2,
                          TRUE ~ 3), # recode votes 1=Y/2=N/3=NV
           state_dist = gsub("[()]", "", cqlabel), # district variable for merging
           # recode party 
           mc_party = case_when(party_code == 100 ~ 1,
                                party_code == 200 ~ 0,
                                TRUE ~ NA_real_ ),
           # generating whether it's a D, R, bipartisan, or NA vote 
           vote_type = ifelse(is.na(V1), NA, ifelse(R_vote==T, "R", ifelse(D_vote==T, "D", ifelse(bipartisan_vote==T, "bipartisan", NA))))) %>%
    select(c("V1", "state_dist", "vote_type", "mc_party")) # just keep vote choice, district label, vote type 
  
  sheets_1_2 <-
    plyr::rename(sheets_1_2, c("V1" = paste(sub('.xls','',mc_bills[i]), "vote", sep = "_"))) # relabels vote variable with name of policy
  
  sheets_1_2 <-
    plyr::rename(sheets_1_2, c("vote_type" = paste(sub('.xls','',mc_bills[i]), "type", sep = "_"))) # relabels vote type variable with name of policy
  
  sheets_1_2 <-
    plyr::rename(sheets_1_2, c("mc_party" = paste(sub('.xls','',mc_bills[i]), "mc_party", sep = "_"))) # relabels party variable with name of policy
  
  if (i == 1) {
    sheets_full <- sheets_1_2
  }
  if (i > 1) {
    # merge new data into full dataset
    sheets_full <- sheets_full %>% full_join(sheets_1_2)
  }
}

# NAs come from districts that were created after 2010 reapportionment/went away after reapportionment.

# Need to weight the votes ----
# Issues with greater SD within party for an issue should be weighted less 
# (an issue that is less strongly associated with a party [greater SD within party voting] has less weight in the measure of congruence) 

# Function to generate weights 
Dvote_wt <- function(vote, party){
  
  D_wt <- ifelse(var(na.omit(vote[party==1]))!=0,
                 1/sqrt((var(na.omit(vote[party==1])))), 0) 
  R_wt <- ifelse(var(na.omit(vote[party==0]))!=0,
                 1/sqrt((var(na.omit(vote[party==0])))), 0) 
  
  return(c(D_wt, R_wt))
  
}

# testing function 
weight2 <- Dvote_wt(sheets_full$acesa_vote, sheets_full$acesa_mc_party)
# get same results this way 
D_wt_acesa <- ifelse(var(na.omit(sheets_full$acesa_vote[sheets_full$acesa_mc_party==1]))!=0,
                     1/sqrt((var(na.omit(sheets_full$acesa_vote[sheets_full$acesa_mc_party==1])))), 0) 
R_wt_acesa <- ifelse(var(na.omit(sheets_full$acesa_vote[sheets_full$acesa_mc_party==0]))!=0,
                     1/sqrt((var(na.omit(sheets_full$acesa_vote[sheets_full$acesa_mc_party==0])))), 0) 
# use this function to generate weights before creating congruence scores with mean alignments 

weight_test <- Dvote_wt(sheets_full$minwage_vote, sheets_full$minwage_mc_party)
D_wt_minwage <- ifelse(var(na.omit(sheets_full$minwage_vote[sheets_full$minwage_mc_party==1]))!=0,
                       1/sqrt((var(na.omit(sheets_full$minwage_vote[sheets_full$minwage_mc_party==1])))), 0) 

# Merging with the CCES data ----

# Load roll call votes and fips codes ----
votes <- read.csv("./Policy Congruence Data/roll_call_votes.csv")
fips <- read.csv("./Policy Congruence Data/us-state-ansi-fips.csv")

# Create function to assess direction of incongruence ----
incongruence <- function(mc_vote, resp_vote, vote_type){
  mc_support <- mc_vote==1
  resp_support <- resp_vote==1
  res <- ifelse(vote_type=="R", (mc_support - resp_support), ifelse(vote_type=="D", (resp_support - mc_support), NA))
  return(res)
}

## Generate congruence measures for each year ----
# 2007 ----
cces_2007_sub <- cces_2007 %>%
  # Filter out NAs for state of residence (CC06_V1002)
  filter(!is.na(CC06_V1002)) %>%
  mutate(year = 2007,
         # Congressional districts (CC06_V1003)
         CC06_V1003 = ifelse(CC06_V1003 == "0", 1, as.numeric(CC06_V1003) - 3), # At-large districts coded as 0 in CCES and 1 in votes dataset; first three levels are at-large, skipped, and didn't ask
         state_dist = paste(CC06_V1002, CC06_V1003, sep="-"), # create district variable by combining state and districts
         case_id = caseid,
         # Recoding roll call vote on fed minimum wage (CC06_V3072) to put NAs as no-votes
         CC06_V3072 = ifelse(is.na(CC06_V3072), 3, CC06_V3072)) %>%
  left_join(votes, by="state_dist") %>% # joining the votes dataset by the district
  # Generating an alignment variable 
  mutate(align_minwage = CC06_V3072==minwage_vote, # non-votes in Congress coded as agreement with skipped/don't knows
         align_minwage_na = ifelse(minwage_vote==3, NA, align_minwage),
         # Using the function to generate direction of incongruence 
         align_minwage_direction = incongruence(minwage_vote, CC06_V3072, minwage_type)) %>% # non-votes in Congress coded as NAs
  # Generate weights for min_wage
  mutate(minwage_weight = case_when(minwage_mc_party==1 ~ ifelse(var(na.omit(minwage_vote[minwage_mc_party==1]))!=0,
                                                                 1/sqrt((var(na.omit(minwage_vote[minwage_mc_party==1])))), 0),
                                    minwage_mc_party==0 ~ ifelse(var(na.omit(minwage_vote[minwage_mc_party==0]))!=0,
                                                                 1/sqrt((var(na.omit(minwage_vote[minwage_mc_party==0])))), 0),
                                    TRUE ~ NA_real_)) %>%
  rowwise() %>%
  # Generate unweighted congruence measures 
  mutate(pol_congruence = mean(c(align_minwage), na.rm=T),
         pol_congruence_na = mean(c(align_minwage_na), na.rm=T),
         pol_congruence_direction = mean(c(align_minwage_direction), na.rm=T)) %>%
  # Using weights on policy incongruence measure 
  mutate(pol_congruence.wt = weighted.mean(align_minwage, minwage_weight, na.rm = T),
         pol_congruence_na.wt = weighted.mean(align_minwage_na, minwage_weight, na.rm = T),
         pol_congruence_direction.wt = weighted.mean(align_minwage_direction, minwage_weight, na.rm = T)) %>%
  ungroup() %>%
  select(year, case_id, pol_congruence, pol_congruence_na, pol_congruence_direction,
         pol_congruence.wt, pol_congruence_na.wt, pol_congruence_direction.wt)


# 2008 ----
cces_2008_sub <- cces_2008 %>%
  # renaming cces state variable 
  rename(st = V251) %>%
  # mutatating to numeric value 
  mutate(st = as.numeric(st)) %>%
  left_join(fips, by="st") %>% # to get state abbreviation attached to state number 
  mutate(year = 2008,
         V250 = ifelse(V250=="00", 1, as.numeric(V250)), # changing at large districts to 1 and leaving others as is 
         state_dist = paste(str_trim(stusps), V250, sep = "-"),
         case_id = V100) %>%
  # Roll call vote cces variables (all start with CC316)
  mutate_at(vars(starts_with('CC316')), list(~ifelse(is.na(.), 3, .)))  %>%
  left_join(votes, by = "state_dist") %>%
  mutate(align_iraq2007 = CC316a==iraq2007_vote,
         align_minwage = CC316b==minwage_vote,
         align_fisa = CC316d==fisa_vote,
         align_schip2007 = CC316e==schip2007_vote,
         align_foreclosure = CC316g==foreclosure_vote,
         align_nafta = CC316h==nafta_vote,
         align_tarp = CC316i==tarp_vote,
         # generate the no vote as NA 
         align_iraq2007_na = ifelse(iraq2007_vote==3, NA, align_iraq2007),
         align_minwage_na = ifelse(minwage_vote==3, NA, align_minwage),
         align_fisa_na = ifelse(fisa_vote==3, NA, align_fisa),
         align_schip2007_na = ifelse(schip2007_vote==3, NA, align_schip2007),
         align_foreclosure_na = ifelse(foreclosure_vote==3, NA, align_foreclosure),
         align_nafta_na = ifelse(nafta_vote==3, NA, align_nafta),
         align_tarp_na = ifelse(tarp_vote==3, NA, align_tarp),
         # generate the direction 
         align_iraq2007_direction = incongruence(iraq2007_vote, CC316a, iraq2007_type),
         align_minwage_direction = incongruence(minwage_vote, CC316b, minwage_type),
         align_fisa_direction = incongruence(fisa_vote, CC316d, fisa_type),
         align_schip2007_direction = incongruence(schip2007_vote, CC316e, schip2007_type),
         align_foreclosure_direction = incongruence(foreclosure_vote, CC316g, foreclosure_type),
         align_nafta_direction = incongruence(nafta_vote, CC316h, nafta_type),
         align_tarp_direction = incongruence(tarp_vote, CC316i, tarp_type)) %>%
  # Generate weights for each roll call vote 
  mutate(iraq2007_weight = case_when(iraq2007_mc_party==1 ~ ifelse(var(na.omit(iraq2007_vote[iraq2007_mc_party==1]))!=0,
                                                                   1/sqrt((var(na.omit(iraq2007_vote[iraq2007_mc_party==1])))), 0),
                                     iraq2007_mc_party==0 ~ ifelse(var(na.omit(iraq2007_vote[iraq2007_mc_party==0]))!=0,
                                                                   1/sqrt((var(na.omit(iraq2007_vote[iraq2007_mc_party==0])))), 0),
                                     TRUE ~ NA_real_)) %>%
  mutate(minwage_weight = case_when(minwage_mc_party==1 ~ ifelse(var(na.omit(minwage_vote[minwage_mc_party==1]))!=0,
                                                                 1/sqrt((var(na.omit(minwage_vote[minwage_mc_party==1])))), 0),
                                    minwage_mc_party==0 ~ ifelse(var(na.omit(minwage_vote[minwage_mc_party==0]))!=0,
                                                                 1/sqrt((var(na.omit(minwage_vote[minwage_mc_party==0])))), 0),
                                    TRUE ~ NA_real_)) %>%
  mutate(fisa_weight = case_when(fisa_mc_party==1 ~ ifelse(var(na.omit(fisa_vote[fisa_mc_party==1]))!=0,
                                                           1/sqrt((var(na.omit(fisa_vote[fisa_mc_party==1])))), 0),
                                 fisa_mc_party==0 ~ ifelse(var(na.omit(fisa_vote[fisa_mc_party==0]))!=0,
                                                           1/sqrt((var(na.omit(fisa_vote[fisa_mc_party==0])))), 0),
                                 TRUE ~ NA_real_)) %>%
  mutate(schip2007_weight = case_when(schip2007_mc_party==1 ~ ifelse(var(na.omit(schip2007_vote[schip2007_mc_party==1]))!=0,
                                                                     1/sqrt((var(na.omit(schip2007_vote[schip2007_mc_party==1])))), 0),
                                      schip2007_mc_party==0 ~ ifelse(var(na.omit(schip2007_vote[schip2007_mc_party==0]))!=0,
                                                                     1/sqrt((var(na.omit(schip2007_vote[schip2007_mc_party==0])))), 0),
                                      TRUE ~ NA_real_)) %>%
  mutate(foreclosure_weight = case_when(foreclosure_mc_party==1 ~ ifelse(var(na.omit(foreclosure_vote[foreclosure_mc_party==1]))!=0,
                                                                         1/sqrt((var(na.omit(foreclosure_vote[foreclosure_mc_party==1])))), 0),
                                        foreclosure_mc_party==0 ~ ifelse(var(na.omit(foreclosure_vote[foreclosure_mc_party==0]))!=0,
                                                                         1/sqrt((var(na.omit(foreclosure_vote[foreclosure_mc_party==0])))), 0),
                                        TRUE ~ NA_real_)) %>%
  mutate(nafta_weight = case_when(nafta_mc_party==1 ~ ifelse(var(na.omit(nafta_vote[nafta_mc_party==1]))!=0,
                                                             1/sqrt((var(na.omit(nafta_vote[nafta_mc_party==1])))), 0),
                                  nafta_mc_party==0 ~ ifelse(var(na.omit(nafta_vote[nafta_mc_party==0]))!=0,
                                                             1/sqrt((var(na.omit(nafta_vote[nafta_mc_party==0])))), 0),
                                  TRUE ~ NA_real_)) %>%
  mutate(tarp_weight = case_when(tarp_mc_party==1~ ifelse(var(na.omit(tarp_vote[tarp_mc_party==1]))!=0,
                                                          1/sqrt((var(na.omit(tarp_vote[tarp_mc_party==1])))), 0),
                                 tarp_mc_party==0 ~ ifelse(var(na.omit(tarp_vote[tarp_mc_party==0]))!=0,
                                                           1/sqrt((var(na.omit(tarp_vote[tarp_mc_party==0])))), 0),
                                 TRUE ~ NA_real_)) %>%
  rowwise() %>%
  # unweighted congruence measures 
  mutate(pol_congruence = mean(c(align_iraq2007, align_minwage, align_fisa, align_schip2007,
                                 align_foreclosure, align_nafta, align_tarp), na.rm=T),
         pol_congruence_na = mean(c(align_iraq2007_na, align_minwage_na, align_fisa_na, align_schip2007_na, 
                                    align_foreclosure_na, align_nafta_na, align_tarp_na), na.rm=T),
         pol_congruence_direction = mean(c(align_iraq2007_direction, align_minwage_direction, align_fisa_direction, align_schip2007_direction, 
                                           align_foreclosure_direction, align_nafta_direction, align_tarp_direction), na.rm=T))  %>%
  # Using weights on policy incongruence measure 
  mutate(pol_congruence.wt = weighted.mean(c(align_iraq2007, align_minwage, align_fisa, align_schip2007,
                                             align_foreclosure, align_nafta, align_tarp),
                                           c(iraq2007_weight, minwage_weight, fisa_weight, 
                                             schip2007_weight, foreclosure_weight, nafta_weight, tarp_weight), na.rm = T),
         pol_congruence_na.wt = weighted.mean(c(align_iraq2007_na, align_minwage_na, align_fisa_na, align_schip2007_na, 
                                                align_foreclosure_na, align_nafta_na, align_tarp_na), 
                                              c(iraq2007_weight, minwage_weight, fisa_weight, 
                                                schip2007_weight, foreclosure_weight, nafta_weight, tarp_weight), na.rm = T),
         pol_congruence_direction.wt = weighted.mean(c(align_iraq2007_direction, align_minwage_direction, align_fisa_direction, align_schip2007_direction, 
                                                       align_foreclosure_direction, align_nafta_direction, align_tarp_direction),
                                                     c(iraq2007_weight, minwage_weight, fisa_weight, 
                                                       schip2007_weight, foreclosure_weight, nafta_weight, tarp_weight), na.rm = T)) %>%
  ungroup() %>%
  select(year, case_id, pol_congruence, pol_congruence_na, pol_congruence_direction,
         pol_congruence.wt, pol_congruence_na.wt, pol_congruence_direction.wt)


# 2009 ----
cces_2009_sub <- cces_2009 %>%
  rename(st = v259) %>%
  mutate(st = as.numeric(st)) %>%
  left_join(fips, by="st") %>%
  mutate(year = 2009,
         v264 = ifelse(v264=="00", 1, as.numeric(v264)),
         state_dist = paste(str_trim(stusps), v264, sep = "-"),
         case_id = v100) %>%
  mutate_at(vars(starts_with('CC09_59')), list(~ifelse(is.na(.), 3, .))) %>%
  left_join(votes, by = "state_dist") %>%
  mutate(align_ledbetter = cc09_59a==ledbetter_vote,
         align_hatecrimes = cc09_59b==hatecrimes_vote,
         align_arra = cc09_59c==arra_vote,
         align_schip2009 = cc09_59d==schip2009_vote,
         align_acesa = cc09_59e==acesa_vote,
         align_ppaca = cc09_59f==ppaca_vote,
         align_hbaca = cc09_59h==hbaca_vote,
         align_ledbetter_na = ifelse(ledbetter_vote==3, NA, align_ledbetter),
         align_hatecrimes_na = ifelse(hatecrimes_vote==3, NA, align_hatecrimes),
         align_arra_na = ifelse(arra_vote==3, NA, align_arra),
         align_schip2009_na = ifelse(schip2009_vote==3, NA, align_schip2009),
         align_acesa_na = ifelse(acesa_vote==3, NA, align_acesa),
         align_ppaca_na = ifelse(ppaca_vote==3, NA, align_acesa),
         align_hbaca_na = ifelse(hbaca_vote==3, NA, align_hbaca),
         align_ledbetter_direction = incongruence(ledbetter_vote, cc09_59a, ledbetter_type),
         align_hatecrimes_direction = incongruence(hatecrimes_vote, cc09_59b, hatecrimes_type),
         align_arra_direction = incongruence(arra_vote, cc09_59c, arra_type),
         align_schip2009_direction = incongruence(schip2009_vote, cc09_59d, schip2009_type),
         align_acesa_direction = incongruence(acesa_vote, cc09_59e, acesa_type),
         align_ppaca_direction = incongruence(ppaca_vote, cc09_59f, ppaca_type),
         align_hbaca_direction = incongruence(hbaca_vote, cc09_59h, hbaca_type)) %>%
  # Generate weights for each roll call vote 
  mutate(ledbetter_weight = case_when(ledbetter_mc_party==1 ~ ifelse(var(na.omit(ledbetter_vote[ledbetter_mc_party==1]))!=0,
                                                                     1/sqrt((var(na.omit(ledbetter_vote[ledbetter_mc_party==1])))), 0),
                                      ledbetter_mc_party==0 ~ ifelse(var(na.omit(ledbetter_vote[ledbetter_mc_party==0]))!=0,
                                                                     1/sqrt((var(na.omit(ledbetter_vote[ledbetter_mc_party==0])))), 0),
                                      TRUE ~ NA_real_)) %>%
  mutate(hatecrimes_weight = case_when(hatecrimes_mc_party==1 ~ ifelse(var(na.omit(hatecrimes_vote[hatecrimes_mc_party==1]))!=0,
                                                                       1/sqrt((var(na.omit(hatecrimes_vote[hatecrimes_mc_party==1])))), 0),
                                       hatecrimes_mc_party==0 ~ ifelse(var(na.omit(hatecrimes_vote[hatecrimes_mc_party==0]))!=0,
                                                                       1/sqrt((var(na.omit(hatecrimes_vote[hatecrimes_mc_party==0])))), 0),
                                       TRUE ~ NA_real_)) %>%
  mutate(arra_weight = case_when(arra_mc_party==1 ~ ifelse(var(na.omit(arra_vote[arra_mc_party==1]))!=0,
                                                           1/sqrt((var(na.omit(arra_vote[arra_mc_party==1])))), 0),
                                 arra_mc_party==0 ~ ifelse(var(na.omit(arra_vote[arra_mc_party==0]))!=0,
                                                           1/sqrt((var(na.omit(arra_vote[arra_mc_party==0])))), 0),
                                 TRUE ~ NA_real_)) %>%
  mutate(schip2009_weight = case_when(schip2009_mc_party==1 ~ ifelse(var(na.omit(schip2009_vote[schip2009_mc_party==1]))!=0,
                                                                     1/sqrt((var(na.omit(schip2009_vote[schip2009_mc_party==1])))), 0),
                                      schip2009_mc_party==0 ~ ifelse(var(na.omit(schip2009_vote[schip2009_mc_party==0]))!=0,
                                                                     1/sqrt((var(na.omit(schip2009_vote[schip2009_mc_party==0])))), 0),
                                      TRUE ~ NA_real_)) %>%
  mutate(acesa_weight = case_when(acesa_mc_party==1 ~ ifelse(var(na.omit(acesa_vote[acesa_mc_party==1]))!=0,
                                                             1/sqrt((var(na.omit(acesa_vote[acesa_mc_party==1])))), 0),
                                  acesa_mc_party==0 ~ ifelse(var(na.omit(acesa_vote[acesa_mc_party==0]))!=0,
                                                             1/sqrt((var(na.omit(acesa_vote[acesa_mc_party==0])))), 0),
                                  TRUE ~ NA_real_)) %>%
  mutate(ppaca_weight = case_when(ppaca_mc_party==1 ~ ifelse(var(na.omit(ppaca_vote[ppaca_mc_party==1]))!=0,
                                                             1/sqrt((var(na.omit(ppaca_vote[ppaca_mc_party==1])))), 0),
                                  ppaca_mc_party==0 ~ ifelse(var(na.omit(ppaca_vote[ppaca_mc_party==0]))!=0,
                                                             1/sqrt((var(na.omit(ppaca_vote[ppaca_mc_party==0])))), 0),
                                  TRUE ~ NA_real_)) %>%
  mutate(hbaca_weight = case_when(hbaca_mc_party==1~ ifelse(var(na.omit(hbaca_vote[hbaca_mc_party==1]))!=0,
                                                            1/sqrt((var(na.omit(hbaca_vote[hbaca_mc_party==1])))), 0),
                                  hbaca_mc_party==0 ~ ifelse(var(na.omit(hbaca_vote[hbaca_mc_party==0]))!=0,
                                                             1/sqrt((var(na.omit(hbaca_vote[hbaca_mc_party==0])))), 0),
                                  TRUE ~ NA_real_)) %>%
  rowwise() %>%
  mutate(pol_congruence = mean(c(align_ledbetter, align_hatecrimes, align_arra, align_schip2009, align_acesa, align_ppaca, align_hbaca), na.rm=T),
         pol_congruence_na = mean(c(align_ledbetter_na, align_hatecrimes_na, align_arra_na, align_schip2009_na, align_acesa_na, align_ppaca_na, align_hbaca_na), na.rm=T),
         pol_congruence_direction = mean(c(align_ledbetter_direction, align_hatecrimes_direction, align_arra_direction, align_schip2009_direction, align_acesa_direction, 
                                           align_ppaca_direction, align_hbaca_direction), na.rm=T))  %>%
  # Using weights on policy incongruence measure 
  mutate(pol_congruence.wt = weighted.mean(c(align_ledbetter, align_hatecrimes, align_arra, align_schip2009, 
                                             align_acesa, align_ppaca, align_hbaca),
                                           c(ledbetter_weight, hatecrimes_weight, arra_weight, schip2009_weight,
                                             acesa_weight, ppaca_weight, hbaca_weight), na.rm = T),
         pol_congruence_na.wt = weighted.mean(c(align_ledbetter_na, align_hatecrimes_na, align_arra_na, align_schip2009_na, align_acesa_na, 
                                                align_ppaca_na, align_hbaca_na), 
                                              c(ledbetter_weight, hatecrimes_weight, arra_weight, schip2009_weight,
                                                acesa_weight, ppaca_weight, hbaca_weight), na.rm = T),
         pol_congruence_direction.wt = weighted.mean(c(align_ledbetter_direction, align_hatecrimes_direction, align_arra_direction, 
                                                       align_schip2009_direction, align_acesa_direction, 
                                                       align_ppaca_direction, align_hbaca_direction),
                                                     c(ledbetter_weight, hatecrimes_weight, arra_weight, schip2009_weight,
                                                       acesa_weight, ppaca_weight, hbaca_weight), na.rm = T)) %>%
  ungroup() %>%
  select(year, case_id, pol_congruence, pol_congruence_na, pol_congruence_direction,
         pol_congruence.wt, pol_congruence_na.wt, pol_congruence_direction.wt)

# 2010 ----
cces_2010_sub <- cces_2010 %>%
  rename(st = V206) %>%
  mutate(st = as.numeric(st)) %>%
  left_join(fips, by="st") %>%
  mutate(year = 2010,
         V276 = ifelse(V276=="00", 1, as.numeric(V276)),
         state_dist = paste(str_trim(stusps), V276, sep = "-"),
         case_id = V100) %>%
  mutate_at(vars(starts_with('CC332')), list(~ifelse(is.na(.), 3, .))) %>%
  left_join(votes, by = "state_dist") %>%
  mutate(align_arra = CC332A==arra_vote,
         align_schip2009 = CC332B==schip2009_vote,
         align_acesa = CC332C==acesa_vote,
         align_ppaca = CC332D==ppaca_vote,
         align_doddfrank = CC332F==doddfrank_vote,
         align_dadt = CC332G==enddadt_vote,
         align_arra_na = ifelse(arra_vote==3, NA, align_arra),
         align_schip2009_na = ifelse(schip2009_vote==3, NA, align_schip2009),
         align_acesa_na = ifelse(acesa_vote==3, NA, align_acesa),
         align_ppaca_na = ifelse(ppaca_vote==3, NA, align_ppaca),
         align_doddfrank_na = ifelse(doddfrank_vote==3, NA, align_doddfrank),
         align_dadt_na = ifelse(enddadt_vote==3, NA, align_dadt),
         align_arra_direction = incongruence(arra_vote, CC332A, arra_type),
         align_schip2009_direction = incongruence(schip2009_vote, CC332B, schip2009_type),
         align_acesa_direction = incongruence(acesa_vote, CC332C, acesa_type),
         align_ppaca_direction = incongruence(ppaca_vote, CC332D, ppaca_type),
         align_doddfrank_direction = incongruence(doddfrank_vote, CC332F, doddfrank_type),
         align_dadt_direction = incongruence(enddadt_vote, CC332G, enddadt_type)) %>%
  # Generate weights for each roll call vote 
  mutate(arra_weight = case_when(arra_mc_party==1 ~ ifelse(var(na.omit(arra_vote[arra_mc_party==1]))!=0,
                                                           1/sqrt((var(na.omit(arra_vote[arra_mc_party==1])))), 0),
                                 arra_mc_party==0 ~ ifelse(var(na.omit(arra_vote[arra_mc_party==0]))!=0,
                                                           1/sqrt((var(na.omit(arra_vote[arra_mc_party==0])))), 0),
                                 TRUE ~ NA_real_)) %>%
  mutate(schip2009_weight = case_when(schip2009_mc_party==1 ~ ifelse(var(na.omit(schip2009_vote[schip2009_mc_party==1]))!=0,
                                                                     1/sqrt((var(na.omit(schip2009_vote[schip2009_mc_party==1])))), 0),
                                      schip2009_mc_party==0 ~ ifelse(var(na.omit(schip2009_vote[schip2009_mc_party==0]))!=0,
                                                                     1/sqrt((var(na.omit(schip2009_vote[schip2009_mc_party==0])))), 0),
                                      TRUE ~ NA_real_)) %>%
  mutate(acesa_weight = case_when(acesa_mc_party==1 ~ ifelse(var(na.omit(acesa_vote[acesa_mc_party==1]))!=0,
                                                             1/sqrt((var(na.omit(acesa_vote[acesa_mc_party==1])))), 0),
                                  acesa_mc_party==0 ~ ifelse(var(na.omit(acesa_vote[acesa_mc_party==0]))!=0,
                                                             1/sqrt((var(na.omit(acesa_vote[acesa_mc_party==0])))), 0),
                                  TRUE ~ NA_real_)) %>%
  mutate(ppaca_weight = case_when(ppaca_mc_party==1 ~ ifelse(var(na.omit(ppaca_vote[ppaca_mc_party==1]))!=0,
                                                             1/sqrt((var(na.omit(ppaca_vote[ppaca_mc_party==1])))), 0),
                                  ppaca_mc_party==0 ~ ifelse(var(na.omit(ppaca_vote[ppaca_mc_party==0]))!=0,
                                                             1/sqrt((var(na.omit(ppaca_vote[ppaca_mc_party==0])))), 0),
                                  TRUE ~ NA_real_)) %>%
  mutate(doddfrank_weight = case_when(doddfrank_mc_party==1 ~ ifelse(var(na.omit(doddfrank_vote[doddfrank_mc_party==1]))!=0,
                                                                     1/sqrt((var(na.omit(doddfrank_vote[doddfrank_mc_party==1])))), 0),
                                      doddfrank_mc_party==0 ~ ifelse(var(na.omit(doddfrank_vote[doddfrank_mc_party==0]))!=0,
                                                                     1/sqrt((var(na.omit(doddfrank_vote[doddfrank_mc_party==0])))), 0),
                                      TRUE ~ NA_real_)) %>%
  mutate(dadt_weight = case_when(enddadt_mc_party==1 ~ ifelse(var(na.omit(enddadt_vote[enddadt_mc_party==1]))!=0,
                                                              1/sqrt((var(na.omit(enddadt_vote[enddadt_mc_party==1])))), 0),
                                 enddadt_mc_party==0 ~ ifelse(var(na.omit(enddadt_vote[enddadt_mc_party==0]))!=0,
                                                              1/sqrt((var(na.omit(enddadt_vote[enddadt_mc_party==0])))), 0),
                                 TRUE ~ NA_real_)) %>%
  
  rowwise() %>%
  mutate(pol_congruence = mean(c(align_arra, align_schip2009, align_acesa, align_ppaca,
                                 align_doddfrank, align_dadt), na.rm=T),
         pol_congruence_na = mean(c(align_arra_na, align_schip2009_na, align_acesa_na, align_ppaca_na,
                                    align_doddfrank_na, align_dadt_na), na.rm=T),
         pol_congruence_direction = mean(c(align_arra_direction, align_schip2009_direction, align_acesa_direction, align_ppaca_direction,
                                           align_doddfrank_direction, align_dadt_direction), na.rm=T)) %>%
  # Using weights on policy incongruence measure 
  mutate(pol_congruence.wt = weighted.mean(c(align_arra, align_schip2009, align_acesa, align_ppaca,
                                             align_doddfrank, align_dadt),
                                           c(arra_weight, schip2009_weight, acesa_weight, ppaca_weight,
                                             doddfrank_weight, dadt_weight), na.rm = T),
         pol_congruence_na.wt = weighted.mean(c(align_arra_na, align_schip2009_na, align_acesa_na, align_ppaca_na,
                                                align_doddfrank_na, align_dadt_na), 
                                              c(arra_weight, schip2009_weight, acesa_weight, ppaca_weight,
                                                doddfrank_weight, dadt_weight), na.rm = T),
         pol_congruence_direction.wt = weighted.mean(c(align_arra_direction, align_schip2009_direction, align_acesa_direction, align_ppaca_direction,
                                                       align_doddfrank_direction, align_dadt_direction),
                                                     c(arra_weight, schip2009_weight, acesa_weight, ppaca_weight,
                                                       doddfrank_weight, dadt_weight), na.rm = T)) %>%
  ungroup() %>%
  select(year, case_id, pol_congruence, pol_congruence_na, pol_congruence_direction,
         pol_congruence.wt, pol_congruence_na.wt, pol_congruence_direction.wt)

# 2011 ----
# no roll call votes for 112th Congress in 2011 CCES common content

# 2012 ----
cces_2012_sub <- cces_2012 %>%
  rename(st = inputstate) %>%
  mutate(st = as.numeric(st)) %>%
  left_join(fips, by="st") %>%
  mutate(year=2012,
         state_dist = paste(str_trim(stusps), as.numeric(cdid), sep = "-"),
         case_id = V101) %>%
  mutate_at(vars(starts_with('CC332')), list(~ifelse(is.na(.), 3, .))) %>%
  left_join(votes, by = "state_dist") %>%
  mutate(align_ryanbudget = CC332A==ryanbudget_vote,
         align_simpsonbowles = CC332B==simpsonbowles_vote,
         align_taxrelief2012 = CC332D==taxrelief2012_vote,
         align_koreafta = CC332F==koreafta_vote,
         align_repealaca2012 = CC332G==repealaca2012_vote,
         align_ryanbudget_na = ifelse(ryanbudget_vote==3, NA, align_ryanbudget),
         align_simpsonbowles_na = ifelse(simpsonbowles_vote==3, NA, align_simpsonbowles),
         align_taxrelief2012_na = ifelse(taxrelief2012_vote==3, NA, align_taxrelief2012),
         align_koreafta_na = ifelse(koreafta_vote==3, NA, align_koreafta),
         align_repealaca2012_na = ifelse(repealaca2012_vote==3, NA, align_repealaca2012),
         align_ryanbudget_direction = incongruence(ryanbudget_vote, CC332A, ryanbudget_type),
         align_simpsonbowles_direction = incongruence(simpsonbowles_vote, CC332B, simpsonbowles_type),
         align_taxrelief2012_direction = incongruence(taxrelief2012_vote, CC332D, taxrelief2012_type),
         align_koreafta_direction = incongruence(koreafta_vote, CC332F, koreafta_type),
         align_repealaca2012_direction = incongruence(repealaca2012_vote, CC332G, repealaca2012_type)) %>%
  # Generate weights for each roll call vote 
  mutate(ryanbudget_weight = case_when(ryanbudget_mc_party==1 ~ ifelse(var(na.omit(ryanbudget_vote[ryanbudget_mc_party==1]))!=0,
                                                                       1/sqrt((var(na.omit(ryanbudget_vote[ryanbudget_mc_party==1])))), 0),
                                       ryanbudget_mc_party==0 ~ ifelse(var(na.omit(ryanbudget_vote[ryanbudget_mc_party==0]))!=0,
                                                                       1/sqrt((var(na.omit(ryanbudget_vote[ryanbudget_mc_party==0])))), 0),
                                       TRUE ~ NA_real_)) %>%
  mutate(simpsonbowles_weight = case_when(simpsonbowles_mc_party==1 ~ ifelse(var(na.omit(simpsonbowles_vote[simpsonbowles_mc_party==1]))!=0,
                                                                             1/sqrt((var(na.omit(simpsonbowles_vote[simpsonbowles_mc_party==1])))), 0),
                                          simpsonbowles_mc_party==0 ~ ifelse(var(na.omit(simpsonbowles_vote[simpsonbowles_mc_party==0]))!=0,
                                                                             1/sqrt((var(na.omit(simpsonbowles_vote[simpsonbowles_mc_party==0])))), 0),
                                          TRUE ~ NA_real_)) %>%
  mutate(taxrelief2012_weight = case_when(taxrelief2012_mc_party==1 ~ ifelse(var(na.omit(taxrelief2012_vote[taxrelief2012_mc_party==1]))!=0,
                                                                             1/sqrt((var(na.omit(taxrelief2012_vote[taxrelief2012_mc_party==1])))), 0),
                                          taxrelief2012_mc_party==0 ~ ifelse(var(na.omit(taxrelief2012_vote[taxrelief2012_mc_party==0]))!=0,
                                                                             1/sqrt((var(na.omit(taxrelief2012_vote[taxrelief2012_mc_party==0])))), 0),
                                          TRUE ~ NA_real_)) %>%
  mutate(koreafta_weight = case_when(koreafta_mc_party==1 ~ ifelse(var(na.omit(koreafta_vote[koreafta_mc_party==1]))!=0,
                                                                   1/sqrt((var(na.omit(koreafta_vote[koreafta_mc_party==1])))), 0),
                                     koreafta_mc_party==0 ~ ifelse(var(na.omit(koreafta_vote[koreafta_mc_party==0]))!=0,
                                                                   1/sqrt((var(na.omit(koreafta_vote[koreafta_mc_party==0])))), 0),
                                     TRUE ~ NA_real_)) %>%
  mutate(repealaca2012_weight = case_when(repealaca2012_mc_party==1 ~ ifelse(var(na.omit(repealaca2012_vote[repealaca2012_mc_party==1]))!=0,
                                                                             1/sqrt((var(na.omit(repealaca2012_vote[repealaca2012_mc_party==1])))), 0),
                                          repealaca2012_mc_party==0 ~ ifelse(var(na.omit(repealaca2012_vote[repealaca2012_mc_party==0]))!=0,
                                                                             1/sqrt((var(na.omit(repealaca2012_vote[repealaca2012_mc_party==0])))), 0),
                                          TRUE ~ NA_real_)) %>%
  rowwise() %>%
  mutate(pol_congruence = mean(c(align_ryanbudget, align_simpsonbowles,
                                 align_taxrelief2012, align_koreafta, align_repealaca2012), na.rm=T),
         pol_congruence_na = mean(c(align_ryanbudget_na, align_simpsonbowles_na,
                                    align_taxrelief2012_na, align_koreafta_na, align_repealaca2012_na), na.rm=T),
         pol_congruence_direction = mean(c(align_ryanbudget_direction, align_simpsonbowles_direction,
                                           align_taxrelief2012_direction, align_koreafta_direction, align_repealaca2012_direction), na.rm=T)) %>%
  # Using weights on policy incongruence measure 
  mutate(pol_congruence.wt = weighted.mean(c(align_ryanbudget, align_simpsonbowles,
                                             align_taxrelief2012, align_koreafta, align_repealaca2012),
                                           c(ryanbudget_weight, simpsonbowles_weight, taxrelief2012_weight,
                                             koreafta_weight, repealaca2012_weight), na.rm = T),
         pol_congruence_na.wt = weighted.mean(c(align_ryanbudget_na, align_simpsonbowles_na,
                                                align_taxrelief2012_na, align_koreafta_na, align_repealaca2012_na), 
                                              c(ryanbudget_weight, simpsonbowles_weight, taxrelief2012_weight,
                                                koreafta_weight, repealaca2012_weight), na.rm = T),
         pol_congruence_direction.wt = weighted.mean(c(align_ryanbudget_direction, align_simpsonbowles_direction,
                                                       align_taxrelief2012_direction, align_koreafta_direction, align_repealaca2012_direction),
                                                     c(ryanbudget_weight, simpsonbowles_weight, taxrelief2012_weight,
                                                       koreafta_weight, repealaca2012_weight), na.rm = T)) %>%
  ungroup() %>%
  select(year, case_id, pol_congruence, pol_congruence_na, pol_congruence_direction,
         pol_congruence.wt, pol_congruence_na.wt, pol_congruence_direction.wt)

# 2013 ----
cces_2013_sub <- cces_2013 %>%
  rename(st = inputstate) %>%
  mutate(st = as.numeric(st)) %>%
  left_join(fips, by="st") %>%
  mutate(year=2013,
         state_dist = paste(str_trim(stusps), as.numeric(cdid113), sep = "-"),
         case_id = caseid) %>%
  mutate_at(vars(starts_with('C332')), list(~ifelse(is.na(.), 3, .))) %>%
  left_join(votes, by = "state_dist") %>%
  mutate(align_ban20wkab = CC332A==ban20wkab_vote,
         align_repealaca2013 = CC332C==repealaca2013_vote,
         align_vaw = CC332F==vaw_vote,
         align_ban20wkab_na = ifelse(ban20wkab_vote==3, NA, align_ban20wkab),
         align_repealaca2013_na = ifelse(repealaca2013_vote==3, NA, align_repealaca2013),
         align_vaw_na = ifelse(vaw_vote==3, NA, align_vaw),
         align_ban20wkab_direction = incongruence(ban20wkab_vote, CC332A, ban20wkab_type),
         align_repealaca2013_direction = incongruence(repealaca2013_vote, CC332C, repealaca2013_type),
         align_vaw_direction = incongruence(vaw_vote, CC332F, vaw_type)) %>%
  # Generate weights for each roll call vote 
  mutate(ban20wkab_weight = case_when(ban20wkab_mc_party==1 ~ ifelse(var(na.omit(ban20wkab_vote[ban20wkab_mc_party==1]))!=0,
                                                                     1/sqrt((var(na.omit(ban20wkab_vote[ban20wkab_mc_party==1])))), 0),
                                      ban20wkab_mc_party==0 ~ ifelse(var(na.omit(ban20wkab_vote[ban20wkab_mc_party==0]))!=0,
                                                                     1/sqrt((var(na.omit(ban20wkab_vote[ban20wkab_mc_party==0])))), 0),
                                      TRUE ~ NA_real_)) %>%
  mutate(repealaca2013_weight = case_when(repealaca2013_mc_party==1 ~ ifelse(var(na.omit(repealaca2013_vote[repealaca2013_mc_party==1]))!=0,
                                                                             1/sqrt((var(na.omit(repealaca2013_vote[repealaca2013_mc_party==1])))), 0),
                                          repealaca2013_mc_party==0 ~ ifelse(var(na.omit(repealaca2013_vote[repealaca2013_mc_party==0]))!=0,
                                                                             1/sqrt((var(na.omit(repealaca2013_vote[repealaca2013_mc_party])))), 0),
                                          TRUE ~ NA_real_)) %>%
  mutate(vaw_weight = case_when(vaw_mc_party==1 ~ ifelse(var(na.omit(vaw_vote[vaw_mc_party==1]))!=0,
                                                         1/sqrt((var(na.omit(vaw_vote[vaw_mc_party==1])))), 0),
                                vaw_mc_party==0 ~ ifelse(var(na.omit(vaw_vote[vaw_mc_party==0]))!=0,
                                                         1/sqrt((var(na.omit(vaw_vote[vaw_mc_party==0])))), 0),
                                TRUE ~ NA_real_)) %>%
  rowwise() %>%
  mutate(pol_congruence = mean(c(align_ban20wkab, align_repealaca2013, align_vaw), na.rm=T),
         pol_congruence_na = mean(c(align_ban20wkab_na, align_repealaca2013_na, align_vaw_na), na.rm=T),
         pol_congruence_direction = mean(c(align_ban20wkab_direction, align_repealaca2013_direction, align_vaw_direction), na.rm=T)) %>%
  # Using weights on policy incongruence measure 
  mutate(pol_congruence.wt = weighted.mean(c(align_ban20wkab, align_repealaca2013, align_vaw),
                                           c(ban20wkab_weight, repealaca2013_weight, vaw_weight), na.rm = T),
         pol_congruence_na.wt = weighted.mean(c(align_ban20wkab_na, align_repealaca2013_na, align_vaw_na), 
                                              c(ban20wkab_weight, repealaca2013_weight, vaw_weight), na.rm = T),
         pol_congruence_direction.wt = weighted.mean(c(align_ban20wkab_direction, align_repealaca2013_direction, align_vaw_direction),
                                                     c(ban20wkab_weight, repealaca2013_weight, vaw_weight), na.rm = T)) %>%
  ungroup() %>%
  select(year, case_id, pol_congruence, pol_congruence_na, pol_congruence_direction,
         pol_congruence.wt, pol_congruence_na.wt, pol_congruence_direction.wt)

# 2014 ----
cces_2014_sub <- cces_2014 %>%
  rename(st = inputstate) %>%
  mutate(st = as.numeric(st)) %>%
  left_join(fips, by="st") %>%
  mutate(year=2014,
         state_dist = paste(str_trim(stusps), as.numeric(cdid), sep = "-"),
         case_id = V101) %>%
  mutate_at(vars(starts_with('CC14_325'), starts_with('CC14_331')), list(~ifelse(is.na(.), 3, .))) %>%
  left_join(votes, by = "state_dist") %>%
  mutate(align_ryanbudget = CC14_325_1==ryanbudget14_vote,
         align_taxrelief2014 = CC14_325_4==taxrelief2014_vote,
         align_debtceiling = CC14_325_5==debtceiling_vote,
         align_agriculture = CC14_331_1==agriculture_vote,
         align_ryanbudget_na = ifelse(ryanbudget14_vote==3, NA, align_ryanbudget),
         align_taxrelief2014_na = ifelse(taxrelief2014_vote==3, NA, align_taxrelief2014),
         align_debtceiling_na = ifelse(debtceiling_vote==3, NA, align_debtceiling),
         align_agriculture_na = ifelse(agriculture_vote==3, NA, align_agriculture),
         align_ryanbudget_direction = incongruence(ryanbudget14_vote, CC14_325_1, ryanbudget14_type),
         align_taxrelief2014_direction = incongruence(taxrelief2014_vote, CC14_325_4, taxrelief2014_type),
         align_debtceiling_direction = incongruence(debtceiling_vote, CC14_325_5, debtceiling_type),
         align_agriculture_direction = incongruence(agriculture_vote, CC14_331_1, agriculture_type)) %>%
  # Generate weights for each roll call vote 
  mutate(ryanbudget_weight = case_when(ryanbudget14_mc_party==1 ~ ifelse(var(na.omit(ryanbudget14_vote[ryanbudget14_mc_party==1]))!=0,
                                                                         1/sqrt((var(na.omit(ryanbudget14_vote[ryanbudget14_mc_party==1])))), 0),
                                       ryanbudget14_mc_party==0 ~ ifelse(var(na.omit(ryanbudget14_vote[ryanbudget14_mc_party==0]))!=0,
                                                                         1/sqrt((var(na.omit(ryanbudget14_vote[ryanbudget14_mc_party==0])))), 0),
                                       TRUE ~ NA_real_)) %>%
  mutate(taxrelief2014_weight = case_when(taxrelief2014_mc_party==1 ~ ifelse(var(na.omit(taxrelief2014_vote[taxrelief2014_mc_party==1]))!=0,
                                                                             1/sqrt((var(na.omit(taxrelief2014_vote[taxrelief2014_mc_party==1])))), 0),
                                          taxrelief2014_mc_party==0 ~ ifelse(var(na.omit(taxrelief2014_vote[taxrelief2014_mc_party==0]))!=0,
                                                                             1/sqrt((var(na.omit(taxrelief2014_vote[taxrelief2014_mc_party])))), 0),
                                          TRUE ~ NA_real_)) %>%
  mutate(debtceiling_weight = case_when(debtceiling_mc_party==1 ~ ifelse(var(na.omit(debtceiling_vote[debtceiling_mc_party==1]))!=0,
                                                                         1/sqrt((var(na.omit(debtceiling_vote[debtceiling_mc_party==1])))), 0),
                                        debtceiling_mc_party==0 ~ ifelse(var(na.omit(debtceiling_vote[debtceiling_mc_party==0]))!=0,
                                                                         1/sqrt((var(na.omit(debtceiling_vote[debtceiling_mc_party==0])))), 0),
                                        TRUE ~ NA_real_)) %>%
  mutate(agriculture_weight = case_when(agriculture_mc_party==1 ~ ifelse(var(na.omit(agriculture_vote[agriculture_mc_party==1]))!=0,
                                                                         1/sqrt((var(na.omit(agriculture_vote[agriculture_mc_party==1])))), 0),
                                        agriculture_mc_party==0 ~ ifelse(var(na.omit(agriculture_vote[agriculture_mc_party==0]))!=0,
                                                                         1/sqrt((var(na.omit(agriculture_vote[agriculture_mc_party==0])))), 0),
                                        TRUE ~ NA_real_)) %>%
  rowwise() %>%
  mutate(pol_congruence = mean(c(align_ryanbudget, align_taxrelief2014, align_debtceiling, align_agriculture), na.rm=T),
         pol_congruence_na = mean(c(align_ryanbudget_na, align_taxrelief2014_na, align_debtceiling_na, align_agriculture_na), na.rm=T),
         pol_congruence_direction = mean(c(align_ryanbudget_direction, align_taxrelief2014_direction, align_debtceiling_direction, align_agriculture_direction), na.rm=T)) %>%
  # Using weights on policy incongruence measure 
  mutate(pol_congruence.wt = weighted.mean(c(align_ryanbudget, align_taxrelief2014, align_debtceiling, align_agriculture),
                                           c(ryanbudget_weight, taxrelief2014_weight, debtceiling_weight, agriculture_weight), na.rm = T),
         pol_congruence_na.wt = weighted.mean(c(align_ryanbudget_na, align_taxrelief2014_na, align_debtceiling_na, align_agriculture_na), 
                                              c(ryanbudget_weight, taxrelief2014_weight, debtceiling_weight, agriculture_weight), na.rm = T),
         pol_congruence_direction.wt = weighted.mean(c(align_ryanbudget_direction, align_taxrelief2014_direction, align_debtceiling_direction, align_agriculture_direction),
                                                     c(ryanbudget_weight, taxrelief2014_weight, debtceiling_weight, agriculture_weight), na.rm = T)) %>%
  ungroup() %>%
  select(year, case_id, pol_congruence, pol_congruence_na, pol_congruence_direction,
         pol_congruence.wt, pol_congruence_na.wt, pol_congruence_direction.wt)

# 2015 ----
cces_2015_sub <- cces_2015 %>%
  rename(st = inputstate) %>%
  mutate(st = as.numeric(st)) %>%
  left_join(fips, by="st") %>%
  mutate(year=2015,
         state_dist = paste(str_trim(stusps), as.numeric(cdid), sep = "-"),
         case_id = V101) %>%
  mutate_at(vars(starts_with('CC15_327')), list(~ifelse(is.na(.), 3, .))) %>%
  left_join(votes, by = "state_dist") %>%
  mutate(align_repealaca2015 = CC15_327A==repealaca2015_vote,
         align_freedomact2015 = CC15_327F2==freedomact2015_vote,
         align_taa = CC15_327G==taa_vote,
         align_repealaca2015_na = ifelse(repealaca2015_vote==3, NA, align_repealaca2015),
         align_freedomact2015_na = ifelse(freedomact2015_vote==3, NA, align_freedomact2015),
         align_taa_na = ifelse(taa_vote==3, NA, align_taa),
         align_repealaca2015_direction = incongruence(repealaca2015_vote, CC15_327A, repealaca2015_type),
         align_freedomact2015_direction = incongruence(freedomact2015_vote, CC15_327F2, freedomact2015_type),
         align_taa_direction = incongruence(taa_vote, CC15_327G, taa_type)) %>%
  # Generate weights for each roll call vote 
  mutate(repealaca2015_weight = case_when(repealaca2015_mc_party==1 ~ ifelse(var(na.omit(repealaca2015_vote[repealaca2015_mc_party==1]))!=0,
                                                                             1/sqrt((var(na.omit(repealaca2015_vote[repealaca2015_mc_party==1])))), 0),
                                          repealaca2015_mc_party==0 ~ ifelse(var(na.omit(repealaca2015_vote[repealaca2015_mc_party==0]))!=0,
                                                                             1/sqrt((var(na.omit(repealaca2015_vote[repealaca2015_mc_party==0])))), 0),
                                          TRUE ~ NA_real_)) %>%
  mutate(freedomact2015_weight = case_when(freedomact2015_mc_party==1 ~ ifelse(var(na.omit(freedomact2015_vote[freedomact2015_mc_party==1]))!=0,
                                                                               1/sqrt((var(na.omit(freedomact2015_vote[freedomact2015_mc_party==1])))), 0),
                                           freedomact2015_mc_party==0 ~ ifelse(var(na.omit(freedomact2015_vote[freedomact2015_mc_party==0]))!=0,
                                                                               1/sqrt((var(na.omit(freedomact2015_vote[freedomact2015_mc_party])))), 0),
                                           TRUE ~ NA_real_)) %>%
  mutate(taa_weight = case_when(taa_mc_party==1 ~ ifelse(var(na.omit(taa_vote[taa_mc_party==1]))!=0,
                                                         1/sqrt((var(na.omit(taa_vote[taa_mc_party==1])))), 0),
                                taa_mc_party==0 ~ ifelse(var(na.omit(taa_vote[taa_mc_party==0]))!=0,
                                                         1/sqrt((var(na.omit(taa_vote[taa_mc_party==0])))), 0),
                                TRUE ~ NA_real_)) %>%
  rowwise() %>%
  mutate(pol_congruence = mean(c(align_repealaca2015, align_freedomact2015, align_taa), na.rm=T),
         pol_congruence_na = mean(c(align_repealaca2015_na, align_freedomact2015_na, align_taa_na), na.rm=T),
         pol_congruence_direction = mean(c(align_repealaca2015_direction, align_freedomact2015_direction, align_taa_direction), na.rm=T)) %>%
  # Using weights on policy incongruence measure 
  mutate(pol_congruence.wt = weighted.mean(c(align_repealaca2015, align_freedomact2015, align_taa),
                                           c(repealaca2015_weight, freedomact2015_weight, taa_weight), na.rm = T),
         pol_congruence_na.wt = weighted.mean(c(align_repealaca2015_na, align_freedomact2015_na, align_taa_na), 
                                              c(repealaca2015_weight, freedomact2015_weight, taa_weight), na.rm = T),
         pol_congruence_direction.wt = weighted.mean(c(align_repealaca2015_direction, align_freedomact2015_direction, align_taa_direction),
                                                     c(repealaca2015_weight, freedomact2015_weight, taa_weight), na.rm = T)) %>%
  ungroup() %>%
  select(year, case_id, pol_congruence, pol_congruence_na, pol_congruence_direction,
         pol_congruence.wt, pol_congruence_na.wt, pol_congruence_direction.wt)

# 2016 ----
cces_2016_sub <- cces_2016 %>%
  rename(st = inputstate) %>%
  mutate(st = as.numeric(st)) %>%
  left_join(fips, by="st") %>%
  mutate(year=2016,
         state_dist = paste(str_trim(stusps), as.numeric(cdid113), sep = "-"),
         case_id = V101) %>%
  mutate_at(vars(starts_with('CC16_351')), list(~ifelse(is.na(.), 3, .))) %>%
  left_join(votes, by = "state_dist") %>%
  mutate(align_freedomact2015 = CC16_351C==freedomact2015_vote,
         align_taa = CC16_351D==taa_vote,
         align_studentsuccess = CC16_351E==studentsuccess_vote,
         align_highways = CC16_351F==highways_vote,
         align_medicare = CC16_351H==medicare_vote,
         align_repealaca2015 = CC16_351I==repealaca2015_vote,
         align_freedomact2015_na = ifelse(freedomact2015_vote==3, NA, align_freedomact2015),
         align_taa_na = ifelse(taa_vote==3, NA, align_taa),
         align_studentsuccess_na = ifelse(studentsuccess_vote==3, NA, align_studentsuccess),
         align_highways_na = ifelse(highways_vote==3, NA, align_highways),
         align_medicare_na = ifelse(medicare_vote==3, NA, align_medicare),
         align_repealaca2015_na = ifelse(repealaca2015_vote==3, NA, align_repealaca2015),
         align_freedomact2015_direction = incongruence(freedomact2015_vote, CC16_351C, freedomact2015_type),
         align_taa_direction = incongruence(taa_vote, CC16_351D, taa_type),
         align_studentsuccess_direction = incongruence(studentsuccess_vote, CC16_351E, studentsuccess_type),
         align_highways_direction = incongruence(highways_vote, CC16_351F, highways_type),
         align_medicare_direction = incongruence(medicare_vote, CC16_351H, medicare_type),
         align_repealaca2015_direction = incongruence(repealaca2015_vote, CC16_351I, repealaca2015_type)) %>%
  # Generate weights for each roll call vote 
  mutate(freedomact2015_weight = case_when(freedomact2015_mc_party==1 ~ ifelse(var(na.omit(freedomact2015_vote[freedomact2015_mc_party==1]))!=0,
                                                                               1/sqrt((var(na.omit(freedomact2015_vote[freedomact2015_mc_party==1])))), 0),
                                           freedomact2015_mc_party==0 ~ ifelse(var(na.omit(freedomact2015_vote[freedomact2015_mc_party==0]))!=0,
                                                                               1/sqrt((var(na.omit(freedomact2015_vote[freedomact2015_mc_party])))), 0),
                                           TRUE ~ NA_real_)) %>%
  mutate(taa_weight = case_when(taa_mc_party==1 ~ ifelse(var(na.omit(taa_vote[taa_mc_party==1]))!=0,
                                                         1/sqrt((var(na.omit(taa_vote[taa_mc_party==1])))), 0),
                                taa_mc_party==0 ~ ifelse(var(na.omit(taa_vote[taa_mc_party==0]))!=0,
                                                         1/sqrt((var(na.omit(taa_vote[taa_mc_party==0])))), 0),
                                TRUE ~ NA_real_)) %>%
  mutate(studentsuccess_weight = case_when(studentsuccess_mc_party==1 ~ ifelse(var(na.omit(studentsuccess_vote[studentsuccess_mc_party==1]))!=0,
                                                                               1/sqrt((var(na.omit(studentsuccess_vote[studentsuccess_mc_party==1])))), 0),
                                           studentsuccess_mc_party==0 ~ ifelse(var(na.omit(studentsuccess_vote[studentsuccess_mc_party==0]))!=0,
                                                                               1/sqrt((var(na.omit(studentsuccess_vote[studentsuccess_mc_party==0])))), 0),
                                           TRUE ~ NA_real_)) %>%
  mutate(highways_weight = case_when(highways_mc_party==1 ~ ifelse(var(na.omit(highways_vote[highways_mc_party==1]))!=0,
                                                                   1/sqrt((var(na.omit(highways_vote[highways_mc_party==1])))), 0),
                                     highways_mc_party==0 ~ ifelse(var(na.omit(highways_vote[highways_mc_party==0]))!=0,
                                                                   1/sqrt((var(na.omit(highways_vote[highways_mc_party==0])))), 0),
                                     TRUE ~ NA_real_)) %>%
  mutate(medicare_weight = case_when(medicare_mc_party==1 ~ ifelse(var(na.omit(medicare_vote[medicare_mc_party==1]))!=0,
                                                                   1/sqrt((var(na.omit(medicare_vote[medicare_mc_party==1])))), 0),
                                     medicare_mc_party==0 ~ ifelse(var(na.omit(medicare_vote[medicare_mc_party==0]))!=0,
                                                                   1/sqrt((var(na.omit(medicare_vote[medicare_mc_party])))), 0),
                                     TRUE ~ NA_real_)) %>%
  mutate(repealaca2015_weight = case_when(repealaca2015_mc_party==1 ~ ifelse(var(na.omit(repealaca2015_vote[repealaca2015_mc_party==1]))!=0,
                                                                             1/sqrt((var(na.omit(repealaca2015_vote[repealaca2015_mc_party==1])))), 0),
                                          repealaca2015_mc_party==0 ~ ifelse(var(na.omit(repealaca2015_vote[repealaca2015_mc_party==0]))!=0,
                                                                             1/sqrt((var(na.omit(repealaca2015_vote[repealaca2015_mc_party==0])))), 0),
                                          TRUE ~ NA_real_)) %>%
  rowwise() %>%
  mutate(pol_congruence = mean(c(align_freedomact2015, align_taa, align_studentsuccess, align_highways, align_medicare, align_repealaca2015), na.rm=T),
         pol_congruence_na = mean(c(align_freedomact2015_na, align_taa_na, align_studentsuccess_na, align_highways_na, align_medicare_na, align_repealaca2015_na), na.rm=T),
         pol_congruence_direction = mean(c(align_freedomact2015_direction, align_taa_direction, align_studentsuccess_direction, align_highways_direction, align_medicare_direction,
                                           align_repealaca2015_direction), na.rm=T)) %>%
  # Using weights on policy incongruence measure 
  mutate(pol_congruence.wt = weighted.mean(c(align_freedomact2015, align_taa, align_studentsuccess, align_highways, 
                                             align_medicare, align_repealaca2015),
                                           c(repealaca2015_weight, taa_weight, studentsuccess_weight, highways_weight,
                                             medicare_weight, repealaca2015_weight), na.rm = T),
         pol_congruence_na.wt = weighted.mean(c(align_freedomact2015_na, align_taa_na, align_studentsuccess_na, 
                                                align_highways_na, align_medicare_na, align_repealaca2015_na), 
                                              c(repealaca2015_weight, taa_weight, studentsuccess_weight, highways_weight,
                                                medicare_weight, repealaca2015_weight), na.rm = T),
         pol_congruence_direction.wt = weighted.mean(c(align_freedomact2015_direction, align_taa_direction, align_studentsuccess_direction, 
                                                       align_highways_direction, align_medicare_direction,align_repealaca2015_direction),
                                                     c(repealaca2015_weight, taa_weight, studentsuccess_weight, highways_weight,
                                                       medicare_weight, repealaca2015_weight), na.rm = T)) %>%
  ungroup() %>%
  select(year, case_id, pol_congruence, pol_congruence_na, pol_congruence_direction,
         pol_congruence.wt, pol_congruence_na.wt, pol_congruence_direction.wt)

# 2017 ----
cces_2017_sub <- cces_2017 %>%
  rename(st = inputstate) %>%
  mutate(st = as.numeric(st)) %>%
  left_join(fips, by="st") %>%
  mutate(year=2017,
         state_dist = paste(str_trim(stusps), as.numeric(cdid115), sep = "-"),
         case_id = V101) %>%
  mutate_at(vars(starts_with('CC17_340')), list(~ifelse(is.na(.), 3, .))) %>%
  left_join(votes, by = "state_dist") %>%
  mutate(align_ahca = CC17_340C==ahca_vote,
         align_choice = CC17_340D==choice_vote,
         align_kateslaw = CC17_340E==kateslaw_vote,
         align_sanctions = CC17_340F==sanctions_vote,
         align_sanctuary = CC17_340G==sanctuary_vote,
         align_appropriations2017 = CC17_340I==appropriations2017_vote,
         align_ahca_na = ifelse(ahca_vote==3, NA, align_ahca),
         align_choice_na = ifelse(choice_vote==3, NA, align_choice),
         align_kateslaw_na = ifelse(kateslaw_vote==3, NA, align_kateslaw),
         align_sanctions_na = ifelse(sanctions_vote==3, NA, align_sanctions),
         align_sanctuary_na = ifelse(sanctuary_vote==3, NA, align_sanctuary),
         align_appropriations2017_na = ifelse(appropriations2017_vote==3, NA, align_appropriations2017),
         align_ahca_direction = incongruence(ahca_vote, CC17_340C, ahca_type),
         align_choice_direction = incongruence(choice_vote, CC17_340D, choice_type),
         align_kateslaw_direction = incongruence(kateslaw_vote, CC17_340E, kateslaw_type),
         align_sanctions_direction = incongruence(sanctions_vote, CC17_340F, sanctions_type),
         align_sanctuary_direction = incongruence(sanctuary_vote, CC17_340G, sanctuary_type),
         align_appropriations2017_direction = incongruence(appropriations2017_vote, CC17_340I, appropriations2017_type))%>%
  # Generate weights for each roll call vote 
  mutate(ahca_weight = case_when(ahca_mc_party==1 ~ ifelse(var(na.omit(ahca_vote[ahca_mc_party==1]))!=0,
                                                           1/sqrt((var(na.omit(ahca_vote[ahca_mc_party==1])))), 0),
                                 ahca_mc_party==0 ~ ifelse(var(na.omit(ahca_vote[ahca_mc_party==0]))!=0,
                                                           1/sqrt((var(na.omit(ahca_vote[ahca_mc_party])))), 0),
                                 TRUE ~ NA_real_)) %>%
  mutate(choice_weight = case_when(choice_mc_party==1 ~ ifelse(var(na.omit(choice_vote[choice_mc_party==1]))!=0,
                                                               1/sqrt((var(na.omit(choice_vote[choice_mc_party==1])))), 0),
                                   choice_mc_party==0 ~ ifelse(var(na.omit(choice_vote[choice_mc_party==0]))!=0,
                                                               1/sqrt((var(na.omit(choice_vote[choice_mc_party==0])))), 0),
                                   TRUE ~ NA_real_)) %>%
  mutate(kateslaw_weight = case_when(kateslaw_mc_party==1 ~ ifelse(var(na.omit(kateslaw_vote[kateslaw_mc_party==1]))!=0,
                                                                   1/sqrt((var(na.omit(kateslaw_vote[kateslaw_mc_party==1])))), 0),
                                     kateslaw_mc_party==0 ~ ifelse(var(na.omit(kateslaw_vote[kateslaw_mc_party==0]))!=0,
                                                                   1/sqrt((var(na.omit(kateslaw_vote[kateslaw_mc_party==0])))), 0),
                                     TRUE ~ NA_real_)) %>%
  mutate(sanctions_weight = case_when(sanctions_mc_party==1 ~ ifelse(var(na.omit(sanctions_vote[sanctions_mc_party==1]))!=0,
                                                                     1/sqrt((var(na.omit(sanctions_vote[sanctions_mc_party==1])))), 0),
                                      sanctions_mc_party==0 ~ ifelse(var(na.omit(sanctions_vote[sanctions_mc_party==0]))!=0,
                                                                     1/sqrt((var(na.omit(sanctions_vote[sanctions_mc_party==0])))), 0),
                                      TRUE ~ NA_real_)) %>%
  mutate(sanctuary_weight = case_when(sanctuary_mc_party==1 ~ ifelse(var(na.omit(sanctuary_vote[sanctuary_mc_party==1]))!=0,
                                                                     1/sqrt((var(na.omit(sanctuary_vote[sanctuary_mc_party==1])))), 0),
                                      sanctuary_mc_party==0 ~ ifelse(var(na.omit(sanctuary_vote[sanctuary_mc_party==0]))!=0,
                                                                     1/sqrt((var(na.omit(sanctuary_vote[sanctuary_mc_party])))), 0),
                                      TRUE ~ NA_real_)) %>%
  mutate(appropriations2017_weight = case_when(appropriations2017_mc_party==1 ~ ifelse(var(na.omit(appropriations2017_vote[appropriations2017_mc_party==1]))!=0,
                                                                                       1/sqrt((var(na.omit(appropriations2017_vote[appropriations2017_mc_party==1])))), 0),
                                               appropriations2017_mc_party==0 ~ ifelse(var(na.omit(appropriations2017_vote[appropriations2017_mc_party==0]))!=0,
                                                                                       1/sqrt((var(na.omit(appropriations2017_vote[appropriations2017_mc_party==0])))), 0),
                                               TRUE ~ NA_real_)) %>%
  rowwise() %>%
  mutate(pol_congruence = mean(c(align_ahca, align_choice, align_kateslaw, align_sanctions, align_sanctuary, align_appropriations2017), na.rm=T),
         pol_congruence_na = mean(c(align_ahca_na, align_choice_na, align_kateslaw_na, align_sanctions_na, align_sanctuary_na, align_appropriations2017_na), na.rm=T),
         pol_congruence_direction = mean(c(align_ahca_direction, align_choice_direction, align_kateslaw_direction, align_sanctions_direction, align_sanctuary_direction, align_appropriations2017_direction), na.rm=T)) %>%
  # Using weights on policy incongruence measure 
  mutate(pol_congruence.wt = weighted.mean(c(align_ahca, align_choice, align_kateslaw, align_sanctions, 
                                             align_sanctuary, align_appropriations2017),
                                           c(ahca_weight, choice_weight, kateslaw_weight, sanctions_weight,
                                             sanctuary_weight, appropriations2017_weight), na.rm = T),
         pol_congruence_na.wt = weighted.mean(c(align_ahca_na, align_choice_na, align_kateslaw_na, align_sanctions_na, 
                                                align_sanctuary_na, align_appropriations2017_na), 
                                              c(ahca_weight, choice_weight, kateslaw_weight, sanctions_weight,
                                                sanctuary_weight, appropriations2017_weight), na.rm = T),
         pol_congruence_direction.wt = weighted.mean(c(align_ahca_direction, align_choice_direction, align_kateslaw_direction, 
                                                       align_sanctions_direction, align_sanctuary_direction, align_appropriations2017_direction),
                                                     c(ahca_weight, choice_weight, kateslaw_weight, sanctions_weight,
                                                       sanctuary_weight, appropriations2017_weight), na.rm = T)) %>%
  ungroup() %>%
  select(year, case_id, pol_congruence, pol_congruence_na, pol_congruence_direction,
         pol_congruence.wt, pol_congruence_na.wt, pol_congruence_direction.wt)

# 2018 ----
cces_2018_sub <- cces_2018 %>%
  rename(st = inputstate) %>%
  mutate(st = as.numeric(st)) %>%
  left_join(fips, by="st") %>%
  mutate(year=2018,
         state_dist = paste(str_trim(stusps), as.numeric(cdid115), sep = "-"),
         case_id = caseid) %>%
  mutate_at(vars(starts_with('CC18_328')), list(~ifelse(is.na(.), 3, .))) %>%
  left_join(votes, by = "state_dist") %>%
  mutate(align_sanctions1 = CC18_328d==sanctions_vote,
         align_sanctions2 = CC18_328e==sanctions_vote, # these items ask about different components of the same bill
         align_sanctions1_na = ifelse(sanctions_vote==3, NA, align_sanctions1),
         align_sanctions2_na = ifelse(sanctions_vote==3, NA, align_sanctions2),
         align_sanctions_direction1 = incongruence(sanctions_vote, CC18_328d, sanctions_type),
         align_sanctions_direction2 = incongruence(sanctions_vote, CC18_328e, sanctions_type))  %>%
  # Generate weights for each roll call vote 
  mutate(sanctions_weight = case_when(sanctions_mc_party==1 ~ ifelse(var(na.omit(sanctions_vote[sanctions_mc_party==1]))!=0,
                                                                     1/sqrt((var(na.omit(sanctions_vote[sanctions_mc_party==1])))), 0),
                                      sanctions_mc_party==0 ~ ifelse(var(na.omit(sanctions_vote[sanctions_mc_party==0]))!=0,
                                                                     1/sqrt((var(na.omit(sanctions_vote[sanctions_mc_party])))), 0),
                                      TRUE ~ NA_real_)) %>%
  rowwise() %>%
  mutate(pol_congruence = mean(c(align_sanctions1, align_sanctions2), na.rm=T),
         pol_congruence_na = mean(c(align_sanctions1_na, align_sanctions2_na), na.rm=T),
         pol_congruence_direction = mean(c(align_sanctions_direction1, align_sanctions_direction2), na.rm=T)) %>%
  # Using weights on policy incongruence measure 
  mutate(pol_congruence.wt = weighted.mean(c(align_sanctions1, align_sanctions2),
                                           c(sanctions_weight, sanctions_weight), na.rm = T),
         pol_congruence_na.wt = weighted.mean(c(align_sanctions1_na, align_sanctions2_na), 
                                              c(sanctions_weight, sanctions_weight), na.rm = T),
         pol_congruence_direction.wt = weighted.mean(c(align_sanctions_direction1, align_sanctions_direction2),
                                                     c(sanctions_weight, sanctions_weight), na.rm = T)) %>%
  ungroup() %>%
  select(year, case_id, pol_congruence, pol_congruence_na, pol_congruence_direction,
         pol_congruence.wt, pol_congruence_na.wt, pol_congruence_direction.wt)

# 2019 ----
cces_2019_sub <- cces_2019 %>%
  rename(st = inputstate) %>%
  mutate(st = as.numeric(st)) %>%
  left_join(fips, by="st") %>%
  mutate(year=2019,
         state_dist = paste(str_trim(stusps), as.numeric(cdid116), sep = "-"),
         case_id = caseid) %>%
  mutate_at(vars(starts_with('CC19_328')), list(~ifelse(is.na(.), 3, .))) %>%
  left_join(votes, by = "state_dist") %>%
  mutate(align_equality = CC19_328a==equality_vote,
         align_minwage19 = CC19_328b==minwage19_vote,
         align_paycheck = CC19_328d==paycheck_vote,
         align_promise = CC19_328e==promise_vote,
         align_equality_na = ifelse(equality_vote==3, NA, align_equality),
         align_minwage19_na = ifelse(minwage19_vote==3, NA, align_minwage19),
         align_paycheck_na = ifelse(paycheck_vote==3, NA, align_paycheck),
         align_promise_na = ifelse(promise_vote==3, NA, align_promise),
         align_equality_direction = incongruence(equality_vote, CC19_328a, equality_type),
         align_minwage19_direction = incongruence(minwage19_vote, CC19_328b, minwage19_type),
         align_paycheck_direction = incongruence(paycheck_vote, CC19_328d, paycheck_type),
         align_promise_direction = incongruence(promise_vote, CC19_328e, promise_type)) %>%
  # Generate weights for each roll call vote 
  mutate(equality_weight = case_when(equality_mc_party==1 ~ ifelse(var(na.omit(equality_vote[equality_mc_party==1]))!=0,
                                                                   1/sqrt((var(na.omit(equality_vote[equality_mc_party==1])))), 0),
                                     equality_mc_party==0 ~ ifelse(var(na.omit(equality_vote[equality_mc_party==0]))!=0,
                                                                   1/sqrt((var(na.omit(equality_vote[equality_mc_party])))), 0),
                                     TRUE ~ NA_real_)) %>%
  mutate(minwage19_weight = case_when(minwage19_mc_party==1 ~ ifelse(var(na.omit(minwage19_vote[minwage19_mc_party==1]))!=0,
                                                                     1/sqrt((var(na.omit(minwage19_vote[minwage19_mc_party==1])))), 0),
                                      minwage19_mc_party==0 ~ ifelse(var(na.omit(minwage19_vote[minwage19_mc_party==0]))!=0,
                                                                     1/sqrt((var(na.omit(minwage19_vote[minwage19_mc_party==0])))), 0),
                                      TRUE ~ NA_real_)) %>%
  mutate(paycheck_weight = case_when(paycheck_mc_party==1 ~ ifelse(var(na.omit(paycheck_vote[paycheck_mc_party==1]))!=0,
                                                                   1/sqrt((var(na.omit(paycheck_vote[paycheck_mc_party==1])))), 0),
                                     paycheck_mc_party==0 ~ ifelse(var(na.omit(paycheck_vote[paycheck_mc_party==0]))!=0,
                                                                   1/sqrt((var(na.omit(paycheck_vote[paycheck_mc_party==0])))), 0),
                                     TRUE ~ NA_real_)) %>%
  mutate(promise_weight = case_when(promise_mc_party==1 ~ ifelse(var(na.omit(promise_vote[promise_mc_party==1]))!=0,
                                                                 1/sqrt((var(na.omit(promise_vote[promise_mc_party==1])))), 0),
                                    promise_mc_party==0 ~ ifelse(var(na.omit(promise_vote[promise_mc_party==0]))!=0,
                                                                 1/sqrt((var(na.omit(promise_vote[promise_mc_party==0])))), 0),
                                    TRUE ~ NA_real_)) %>%
  rowwise() %>%
  mutate(pol_congruence = mean(c(align_equality, align_minwage19, align_paycheck, align_promise), na.rm=T),
         pol_congruence_na = mean(c(align_equality_na, align_minwage19_na, align_paycheck_na, align_promise_na), na.rm=T),
         pol_congruence_direction = mean(c(align_equality_direction, align_minwage19_direction, align_paycheck_direction, align_promise_direction), na.rm=T)) %>%
  # Using weights on policy incongruence measure 
  mutate(pol_congruence.wt = weighted.mean(c(align_equality, align_minwage19, align_paycheck, align_promise),
                                           c(equality_weight, minwage19_weight, paycheck_weight, promise_weight), na.rm = T),
         pol_congruence_na.wt = weighted.mean(c(align_equality_na, align_minwage19_na, align_paycheck_na, align_promise_na), 
                                              c(equality_weight, minwage19_weight, paycheck_weight, promise_weight), na.rm = T),
         pol_congruence_direction.wt = weighted.mean(c(align_equality_direction, align_minwage19_direction, 
                                                       align_paycheck_direction, align_promise_direction),
                                                     c(equality_weight, minwage19_weight, paycheck_weight, promise_weight), na.rm = T)) %>%
  ungroup() %>%
  select(year, case_id, pol_congruence, pol_congruence_na, pol_congruence_direction,
         pol_congruence.wt, pol_congruence_na.wt, pol_congruence_direction.wt)

# 2020 ----
cces_2020_sub <- cces_2020 %>%
  rename(st = inputstate) %>%
  mutate(st = as.numeric(st)) %>%
  left_join(fips, by = "st") %>%
  mutate(year = 2020,
         state_dist = paste(str_trim(stusps), as.numeric(cdid116), sep = "-"),
         case_id = caseid) %>%
  mutate_at(vars(starts_with('CC20_35')), list(~ifelse(is.na(.), 3, .))) %>%
  left_join(votes, by = "state_dist") %>%
  mutate(align_equality = CC20_350a==equality_vote,
         align_minwage19 = CC20_350b==minwage19_vote,
         align_paycheck = CC20_350d==paycheck_vote,
         align_promise = CC20_350e==promise_vote,
         align_impeach_abuse = CC20_350f==impeach_abuse_vote,
         align_impeach_obstruction = CC20_350g==impeach_obstruction_vote,
         align_cares = CC20_351a==cares_vote,
         align_heroes = CC20_351b==heroes_vote,
         align_equality_na = ifelse(equality_vote==3, NA, align_equality),
         align_minwage19_na = ifelse(minwage19_vote==3, NA, align_minwage19),
         align_paycheck_na = ifelse(paycheck_vote==3, NA, align_paycheck),
         align_promise_na = ifelse(promise_vote==3, NA, align_promise),
         align_impeach_abuse_na = ifelse(impeach_abuse_vote==3, NA, align_impeach_abuse),
         align_impeach_obstruction_na = ifelse(impeach_obstruction_vote==3, NA, align_impeach_obstruction),
         align_cares_na = ifelse(cares_vote==3, NA, align_cares),
         align_heroes_na = ifelse(heroes_vote==3, NA, align_heroes),
         align_equality_direction = incongruence(equality_vote, CC20_350a, equality_type),
         align_minwage19_direction = incongruence(minwage19_vote, CC20_350b, minwage19_type),
         align_paycheck_direction = incongruence(paycheck_vote, CC20_350d, paycheck_type),
         align_promise_direction = incongruence(promise_vote, CC20_350e, promise_type),
         align_impeach_abuse_direction = incongruence(impeach_abuse_vote, CC20_350f, impeach_abuse_type),
         align_impeach_obstruction_direction = incongruence(impeach_obstruction_vote, CC20_350g, impeach_obstruction_type),
         align_cares_direction = incongruence(cares_vote, CC20_351a, cares_type),
         align_heroes_direction = incongruence(heroes_vote, CC20_351b, heroes_type)) %>%
  # Generate weights for each roll call vote 
  mutate(equality_weight = case_when(equality_mc_party==1 ~ ifelse(var(na.omit(equality_vote[equality_mc_party==1]))!=0,
                                                                   1/sqrt((var(na.omit(equality_vote[equality_mc_party==1])))), 0),
                                     equality_mc_party==0 ~ ifelse(var(na.omit(equality_vote[equality_mc_party==0]))!=0,
                                                                   1/sqrt((var(na.omit(equality_vote[equality_mc_party])))), 0),
                                     TRUE ~ NA_real_)) %>%
  mutate(minwage19_weight = case_when(minwage19_mc_party==1 ~ ifelse(var(na.omit(minwage19_vote[minwage19_mc_party==1]))!=0,
                                                                     1/sqrt((var(na.omit(minwage19_vote[minwage19_mc_party==1])))), 0),
                                      minwage19_mc_party==0 ~ ifelse(var(na.omit(minwage19_vote[minwage19_mc_party==0]))!=0,
                                                                     1/sqrt((var(na.omit(minwage19_vote[minwage19_mc_party==0])))), 0),
                                      TRUE ~ NA_real_)) %>%
  mutate(paycheck_weight = case_when(paycheck_mc_party==1 ~ ifelse(var(na.omit(paycheck_vote[paycheck_mc_party==1]))!=0,
                                                                   1/sqrt((var(na.omit(paycheck_vote[paycheck_mc_party==1])))), 0),
                                     paycheck_mc_party==0 ~ ifelse(var(na.omit(paycheck_vote[paycheck_mc_party==0]))!=0,
                                                                   1/sqrt((var(na.omit(paycheck_vote[paycheck_mc_party==0])))), 0),
                                     TRUE ~ NA_real_)) %>%
  mutate(promise_weight = case_when(promise_mc_party==1 ~ ifelse(var(na.omit(promise_vote[promise_mc_party==1]))!=0,
                                                                 1/sqrt((var(na.omit(promise_vote[promise_mc_party==1])))), 0),
                                    promise_mc_party==0 ~ ifelse(var(na.omit(promise_vote[promise_mc_party==0]))!=0,
                                                                 1/sqrt((var(na.omit(promise_vote[promise_mc_party==0])))), 0),
                                    TRUE ~ NA_real_)) %>%
  mutate(impeach_abuse_weight = case_when(impeach_abuse_mc_party==1 ~ ifelse(var(na.omit(impeach_abuse_vote[impeach_abuse_mc_party==1]))!=0,
                                                                             1/sqrt((var(na.omit(impeach_abuse_vote[impeach_abuse_mc_party==1])))), 0),
                                          impeach_abuse_mc_party==0 ~ ifelse(var(na.omit(impeach_abuse_vote[impeach_abuse_mc_party==0]))!=0,
                                                                             1/sqrt((var(na.omit(impeach_abuse_vote[impeach_abuse_mc_party])))), 0),
                                          TRUE ~ NA_real_)) %>%
  mutate(impeach_obstruction_weight = case_when(impeach_obstruction_mc_party==1 ~ ifelse(var(na.omit(impeach_obstruction_vote[impeach_obstruction_mc_party==1]))!=0,
                                                                                         1/sqrt((var(na.omit(impeach_obstruction_vote[impeach_obstruction_mc_party==1])))), 0),
                                                impeach_obstruction_mc_party==0 ~ ifelse(var(na.omit(impeach_obstruction_vote[impeach_obstruction_mc_party==0]))!=0,
                                                                                         1/sqrt((var(na.omit(impeach_obstruction_vote[impeach_obstruction_mc_party==0])))), 0),
                                                TRUE ~ NA_real_)) %>%
  mutate(cares_weight = case_when(cares_mc_party==1 ~ ifelse(var(na.omit(cares_vote[cares_mc_party==1]))!=0,
                                                             1/sqrt((var(na.omit(cares_vote[cares_mc_party==1])))), 0),
                                  cares_mc_party==0 ~ ifelse(var(na.omit(cares_vote[cares_mc_party==0]))!=0,
                                                             1/sqrt((var(na.omit(cares_vote[cares_mc_party==0])))), 0),
                                  TRUE ~ NA_real_)) %>%
  mutate(heroes_weight = case_when(heroes_mc_party==1 ~ ifelse(var(na.omit(heroes_vote[heroes_mc_party==1]))!=0,
                                                               1/sqrt((var(na.omit(heroes_vote[heroes_mc_party==1])))), 0),
                                   heroes_mc_party==0 ~ ifelse(var(na.omit(heroes_vote[heroes_mc_party==0]))!=0,
                                                               1/sqrt((var(na.omit(heroes_vote[heroes_mc_party==0])))), 0),
                                   TRUE ~ NA_real_)) %>%
  rowwise() %>%
  mutate(pol_congruence = mean(c(align_equality, align_minwage19, align_paycheck, align_promise, align_impeach_abuse, align_impeach_obstruction, align_cares, align_heroes), na.rm=T),
         pol_congruence_na = mean(c(align_equality_na, align_minwage19_na, align_paycheck_na, align_promise_na, align_impeach_abuse_na, align_impeach_obstruction_na, align_cares_na, 
                                    align_heroes_na), na.rm=T),
         pol_congruence_direction = mean(c(align_equality_direction, align_minwage19_direction, align_paycheck_direction, align_promise_direction, align_impeach_abuse_direction, align_impeach_obstruction_direction, align_cares_direction, 
                                           align_heroes_direction), na.rm=T)) %>%
  # Using weights on policy incongruence measure 
  mutate(pol_congruence.wt = weighted.mean(c(align_equality, align_minwage19, align_paycheck, align_promise, 
                                             align_impeach_abuse, align_impeach_obstruction, align_cares, align_heroes),
                                           c(equality_weight, minwage19_weight, paycheck_weight, promise_weight,
                                             impeach_abuse_weight, impeach_obstruction_weight, cares_weight, heroes_weight), na.rm = T),
         pol_congruence_na.wt = weighted.mean(c(align_equality_na, align_minwage19_na, align_paycheck_na, align_promise_na, 
                                                align_impeach_abuse_na, align_impeach_obstruction_na, align_cares_na, 
                                                align_heroes_na), 
                                              c(equality_weight, minwage19_weight, paycheck_weight, promise_weight,
                                                impeach_abuse_weight, impeach_obstruction_weight, cares_weight, heroes_weight), na.rm = T),
         pol_congruence_direction.wt = weighted.mean(c(align_equality_direction, align_minwage19_direction, align_paycheck_direction, 
                                                       align_promise_direction, align_impeach_abuse_direction, 
                                                       align_impeach_obstruction_direction, align_cares_direction, 
                                                       align_heroes_direction),
                                                     c(equality_weight, minwage19_weight, paycheck_weight, promise_weight,
                                                       impeach_abuse_weight, impeach_obstruction_weight, cares_weight, heroes_weight), na.rm = T)) %>%
  ungroup() %>%
  select(year, case_id, pol_congruence, pol_congruence_na, pol_congruence_direction,
         pol_congruence.wt, pol_congruence_na.wt, pol_congruence_direction.wt)


















## Bind even years and odd years ----
# Even years 
mc_policy_evens <- rbind(cces_2008_sub, cces_2010_sub, cces_2012_sub, cces_2014_sub, 
                         cces_2016_sub, cces_2018_sub, cces_2020_sub) %>%
  mutate(pol_congruence_na = ifelse(is.nan(pol_congruence_na), NA, pol_congruence_na))

# Odd years 
mc_policy_odds <- rbind(cces_2007_sub, cces_2009_sub, cces_2013_sub, 
                        cces_2015_sub, cces_2017_sub, cces_2019_sub) %>%
  mutate(pol_congruence_na = ifelse(is.nan(pol_congruence_na), NA, pol_congruence_na))

# Recode to incongruence ----
# reverse direction so that 0 indicates perfect congruence and 1 indicates perfect incongruence and create above/below 50% incongruence measures 
mc_policy_evens2 <- mc_policy_evens %>%
  mutate(pol_congruence = rescale(pol_congruence, to=c(1,0)),
         pol_congruence_na = rescale(pol_congruence_na, to=c(1,0)),
         pol_congruence_direction = rescale(pol_congruence_direction, to=c(1,0)),
         pol_congruence.wt = rescale(pol_congruence.wt, to=c(1,0)),
         pol_congruence_na.wt = rescale(pol_congruence_na.wt, to=c(1,0)),
         pol_congruence_direction.wt = rescale(pol_congruence_direction, to=c(1,0)),
         pol_congruence_dummy = ifelse(pol_congruence >= 0.5, 1, ifelse(!is.na(pol_congruence), 0, NA)),
         pol_congruence_na_dummy = ifelse(pol_congruence_na >= 0.5, 1, ifelse(!is.na(pol_congruence_na), 0, NA)),
         pol_congruence_direction_dummy = ifelse(abs(pol_congruence_direction) > 0.5, 1, ifelse(!is.na(pol_congruence_direction), 0, NA)))

mc_policy_odds2 <- mc_policy_odds %>%
  mutate(pol_congruence = rescale(pol_congruence, to=c(1,0)),
         pol_congruence_na = rescale(pol_congruence_na, to=c(1,0)),
         pol_congruence_direction = rescale(pol_congruence_direction, to=c(1,0)),
         pol_congruence.wt = rescale(pol_congruence.wt, to=c(1,0)),
         pol_congruence_na.wt = rescale(pol_congruence_na.wt, to=c(1,0)),
         pol_congruence_direction.wt = rescale(pol_congruence_direction.wt, to=c(1,0)),
         pol_congruence_dummy = ifelse(pol_congruence >= 0.5, 1, ifelse(!is.na(pol_congruence), 0, NA)),
         pol_congruence_na_dummy = ifelse(pol_congruence_na >= 0.5, 1, ifelse(!is.na(pol_congruence_na), 0, NA)),
         pol_congruence_direction_dummy = ifelse(abs(pol_congruence_direction) > 0.5, 1, ifelse(!is.na(pol_congruence_direction), 0, NA))) %>%
  rename(year2 = year)

# Merge with datasets that have ideological congruence already ----
df_odd <- df_odd7
df_even <- df_even7

df_odd2 <- df_odd %>%
  left_join(mc_policy_odds2, by = c("year2", "case_id")) # merge by cces case id for respondent-MC congruence measure

df_even2 <- df_even %>%
  left_join(mc_policy_evens2, by = c("year", "case_id"))

## Generate Tercile Variables ----
# range pol_congruence = [0,1] -> 0 to .33, .34 to .67, .68 to 1

# EVENS 
df_even3 <- df_even2 %>%
  mutate(inc_tercile_pol = case_when(pol_congruence <= .33 ~ 1,
                                     pol_congruence > .33 & pol_congruence <= .67 ~ 2,
                                     pol_congruence > .67 ~ 3,
                                     TRUE ~ NA_real_),
         inc_tercile_pol_na = case_when(pol_congruence_na <= .33 ~ 1,
                                        pol_congruence_na > .33 & pol_congruence_na <= .67 ~ 2,
                                        pol_congruence_na > .67 ~ 3,
                                        TRUE ~ NA_real_),
         inc_tercile_pol.wt = case_when(pol_congruence.wt <= .33 ~ 1,
                                        pol_congruence.wt > .33 & pol_congruence <= .67 ~ 2,
                                        pol_congruence.wt > .67 ~ 3,
                                        TRUE ~ NA_real_),
         inc_tercile_pol_na.wt = case_when(pol_congruence_na.wt <= .33 ~ 1,
                                           pol_congruence_na.wt > .33 & pol_congruence_na <= .67 ~ 2,
                                           pol_congruence_na.wt > .67 ~ 3,
                                           TRUE ~ NA_real_))

# ODDS
df_odd3 <- df_odd2 %>%
  mutate(inc_tercile_pol = case_when(pol_congruence <= .33 ~ 1,
                                     pol_congruence > .33 & pol_congruence <= .67 ~ 2,
                                     pol_congruence > .67 ~ 3,
                                     TRUE ~ NA_real_),
         inc_tercile_pol_na = case_when(pol_congruence_na <= .33 ~ 1,
                                        pol_congruence_na > .33 & pol_congruence_na <= .67 ~ 2,
                                        pol_congruence_na > .67 ~ 3,
                                        TRUE ~ NA_real_),
         inc_tercile_pol.wt = case_when(pol_congruence.wt <= .33 ~ 1,
                                        pol_congruence.wt > .33 & pol_congruence <= .67 ~ 2,
                                        pol_congruence.wt > .67 ~ 3,
                                        TRUE ~ NA_real_),
         inc_tercile_pol_na.wt = case_when(pol_congruence_na.wt <= .33 ~ 1,
                                           pol_congruence_na.wt > .33 & pol_congruence_na <= .67 ~ 2,
                                           pol_congruence_na.wt > .67 ~ 3,
                                           TRUE ~ NA_real_))


#### CANDIDATES JUST FOR EVEN YEAR DATA ####
# Generate the data for figuring out the likelihood of voting for the MC based on gender/race 

# rename 
df_even <- df_even3

# Get each MC's vote share from the preceding election ----
df_candidates <- read.dta("./candidates_2006-2020.dta") %>%
  # subset to House candidates who won election in even years only
  filter(office=="H", won=="1", year %in% c("2006", "2008", "2010", "2012", "2014", "2016", "2018")) 

# Create state variable so easier for merging 
states <- as.data.frame(cbind(state.abb, state.name))
colnames(states) <- c("st_abbr","state")

# bind with the candidate dataset 
df_candidates2 <- df_candidates %>%
  rename(st_abbr = state) %>%
  left_join(states, by = "st_abbr")

# select variables for merging ----
df_candidates3 <- df_candidates2 %>% 
  select(c(state, dist, year, name_snyder, st_abbr))

# merge datasets
df_even2 <- df_even %>% left_join(df_candidates3, by=c("state", "dist", "year", "st_abbr"))


# checking what the names look like 
#head(df_even2$name_snyder)
#head(df_even2$mc_name_last)

# need to take what comes before the comma from "name_snyder" and then make lower case aside from first letter 
df_even3 <- df_even2 %>%
  separate(col = name_snyder, into = c("snyder_last", "snyder_first"), sep = ", ", remove = FALSE) %>%
  mutate(snyder_last = str_to_title(snyder_last)) %>%
  # need to generate a new name variable for just last name of "voted_rep_chosen"
  separate(col = voted_rep_chosen, into = c("voted_rep_first", "voted_rep_middle", "voted_rep_last", "voted_rep_last.v2"), sep = " ", remove = FALSE) %>%
  mutate(voted_rep_last2 = case_when(voted_rep_last %in% c("(R)", "(D)", "Jr.") ~ voted_rep_middle,
                                     grepl("\"",voted_rep_last) ~ voted_rep_last.v2,
                                     TRUE ~ voted_rep_last),
         voted_rep_last2 = gsub(",", "", voted_rep_last2),
         voted_rep_last2 = tolower(voted_rep_last2),
         voted_rep_last2 = str_to_title(voted_rep_last2)) %>%
  # looks like almost there, except names like "Van Hollen" were split to "Hollen" for voted_rep_last2 and stay "van hollen" for snyder_last
  # so when generating variable, not necessarily match but that either match or voted_rep_last2 is in snyder_last
  # Need to generate a variable vote_winner: 
  # 1 if voted_rep_last2 and snyder_last are matched
  # 0 if they don't match and voted_rep_last2 is not NA 
  # NA if they don't match and voted_rep_last2 is NA 
  mutate(vote_winner = case_when(voted_rep_last2 == snyder_last & !is.na(voted_rep_last2) ~ 1,
                                 voted_rep_last2 != snyder_last & !is.na(voted_rep_last2) ~ 0,
                                 TRUE ~ NA_real_)) 


#### ADDING RACIAL RESENTMENT AND HOSTILE SEXISM ####

# rename 
df_even <- df_even3

## Merging Racial resentment and hostile sexism (only have hostile sexism in 2018 and 2020)

# 2008  ----
# no RR 
cces_2008_sub <- cces_2008 %>%
  mutate(year = 2008) %>%
  select(year, V100) %>%
  mutate(rr1 = NA,
         rr2 = NA,
         rr3 = NA,
         rr4 = NA,
         rr_index = NA,
         rr_index_full = NA,
         hs1 = NA,
         hs2 = NA,
         hs3 = NA,
         hs4 = NA,
         hs_index = NA) %>%
  rename(case_id = V100) %>%
  select(year, case_id, rr1, rr2, rr3, rr4, rr_index, rr_index_full, hs1, hs2, hs3, hs4, hs_index)

# 2010 ----
# CC422a Racial Resentment — "Irish, Italians, Jews..."
# CC422b Racial Resentment — "Generations of slavery..."
cces_2010_sub <- cces_2010 %>%
  mutate(year = 2010) %>%
  select(year, V100, CC422a, CC422b) %>%
  mutate(rr1 = case_when(as.numeric(CC422a)<8 ~ rescale(as.numeric(CC422a), to=c(1,0)), TRUE ~ NA_real_),
         rr2 = case_when(as.numeric(CC422b)<8 ~ rescale(as.numeric(CC422b), to=c(0,1)), TRUE ~ NA_real_),
         rr3 = NA,
         rr4 = NA,
         rr_index = NA,
         rr_index_full = NA) %>%
  rowwise() %>%
  mutate(rr_index = mean(c(rr1, rr2), na.rm=F)) %>%
  ungroup() %>%
  mutate(hs1 = NA,
         hs2 = NA,
         hs3 = NA,
         hs4 = NA,
         hs_index = NA) %>%
  rename(case_id = V100) %>%
  select(year, case_id, rr1, rr2, rr3, rr4, rr_index, rr_index_full, hs1, hs2, hs3, hs4, hs_index)

# 2012 ---- 
# CC422a Racial Resentment — "Irish, Italians, Jews..."
# CC422b Racial Resentment — "Generations of slavery..."
cces_2012_sub <- cces_2012 %>%
  mutate(year = 2012) %>%
  select(year, V101, CC422a, CC422b) %>%
  mutate(rr1 = case_when(as.numeric(CC422a)<8 ~ rescale(as.numeric(CC422a), to=c(1,0)), TRUE ~ NA_real_),
         rr2 = case_when(as.numeric(CC422b)<8 ~ rescale(as.numeric(CC422b), to=c(0,1)), TRUE ~ NA_real_),
         rr3 = NA,
         rr4 = NA,
         rr_index = NA,
         rr_index_full = NA) %>%
  rowwise() %>%
  mutate(rr_index = mean(c(rr1, rr2), na.rm=F)) %>%
  ungroup() %>%
  mutate(hs1 = NA,
         hs2 = NA,
         hs3 = NA,
         hs4 = NA,
         hs_index = NA) %>%
  rename(case_id = V101) %>%
  select(year, case_id, rr1, rr2, rr3, rr4, rr_index, rr_index_full, hs1, hs2, hs3, hs4, hs_index)


# 2014 ---- 
# CC422a Racial Resentment — "Irish, Italians, Jews..."
# CC422b Racial Resentment — "Generations of slavery..."
cces_2014_sub <- cces_2014 %>%
  mutate(year = 2014) %>%
  select(year, V101, CC422a, CC422b) %>%
  mutate(rr1 = case_when(as.numeric(CC422a)<8 ~ rescale(as.numeric(CC422a), to=c(1,0)), TRUE ~ NA_real_),
         rr2 = case_when(as.numeric(CC422b)<8 ~ rescale(as.numeric(CC422b), to=c(0,1)), TRUE ~ NA_real_),
         rr3 = NA,
         rr4 = NA,
         rr_index = NA,
         rr_index_full = NA) %>%
  rowwise() %>%
  mutate(rr_index = mean(c(rr1, rr2), na.rm=F)) %>%
  ungroup() %>%
  mutate(hs1 = NA,
         hs2 = NA,
         hs3 = NA,
         hs4 = NA,
         hs_index = NA) %>%
  rename(case_id = V101) %>%
  select(year, case_id, rr1, rr2, rr3, rr4, rr_index, rr_index_full, hs1, hs2, hs3, hs4, hs_index)


# 2016 ---- 

cces_2016_sub <- cces_2016 %>%
  mutate(year = 2016) %>%
  select(year, V101) %>%
  rename(case_id = V101) %>%
  mutate(hs1 = NA,
         hs2 = NA,
         hs3 = NA,
         hs4 = NA,
         hs_index = NA) %>%
  select(year, case_id, hs1, hs2, hs3, hs4, hs_index)

# merge with racial resentment modules - Use Agadjanian Data
df_rr <- read.csv("./CCES Files/cces_2016_rr_modules.csv") %>%
  rename(case_id = id,
         rr1 = favors,
         rr2 = slavery,
         rr3 = deserve,
         rr4 = harder) %>%
  rowwise() %>%
  mutate(rr_index_full = mean(c(rr1, rr2, rr3, rr4), na.rm=F),
         rr_index = mean(c(rr1, rr2), na.rm=F)) %>%
  ungroup() %>%
  select(case_id, rr1, rr2, rr3, rr4, rr_index, rr_index_full)

cces_2016_sub <- full_join(df_rr, cces_2016_sub, by="case_id")


# 2018 ---- 
# CC18_422e — Racial resentment, "Irish, Italians, Jewish..."
# CC18_422f — Racial resentment, "Generations of slavery..."
# CC18_422g — Racial resentment, "Less than they deserve"
# CC18_422h — "Some people not trying hard enough"
# CC18_422c - "Sexism – When women lose to men in a fair competition, they typically complain about being discriminated against."
# CC18_422d - "Sexism – Feminists are making entirely reasonable demands of men."
cces_2018_sub <- cces_2018 %>%
  mutate(year = 2018) %>%
  select(year, caseid, CC18_422e, CC18_422f, CC18_422g, CC18_422h, CC18_422c, CC18_422d) %>%
  mutate(rr1 = case_when(as.numeric(CC18_422e)<8 ~ rescale(as.numeric(CC18_422e), to=c(1,0)), TRUE ~ NA_real_),
         rr2 = case_when(as.numeric(CC18_422f)<8 ~ rescale(as.numeric(CC18_422f), to=c(0,1)), TRUE ~ NA_real_),
         rr3 = case_when(as.numeric(CC18_422g)<8 ~ rescale(as.numeric(CC18_422g), to=c(0,1)), TRUE ~ NA_real_),
         rr4 = case_when(as.numeric(CC18_422h)<8 ~ rescale(as.numeric(CC18_422h), to=c(1,0)), TRUE ~ NA_real_)) %>%
  rowwise() %>%
  mutate(rr_index_full = mean(c(rr1, rr2, rr3, rr4), na.rm=F),
         rr_index = mean(c(rr1, rr2), na.rm=F)) %>%
  ungroup() %>%
  rename(case_id = caseid) %>%
  mutate(hs1 = case_when(as.numeric(CC18_422c)<6 ~ rescale(as.numeric(CC18_422c), to=c(1,0)), TRUE ~ NA_real_),
         hs2 = case_when(as.numeric(CC18_422d)<6 ~ rescale(as.numeric(CC18_422d), to=c(1,0)), TRUE ~ NA_real_),
         hs3 = NA,
         hs4 = NA,
         hs_index = (hs1 + hs2)/2) %>%
  select(year, case_id, rr1, rr2, rr3, rr4, rr_index, rr_index_full, hs1, hs2, hs3, hs4, hs_index)

# 2020 ----
# CC20_441_a Italians
# CC20_441_b Generations
# CC20_440c - "Women seek to gain power by getting control over men."
# CC20_440d - "Women are too easily offended."
cces_2020_sub <- cces_2020 %>%
  mutate(year = 2020) %>%
  select(year, caseid, CC20_441a, CC20_441b, CC20_440c, CC20_440d) %>%
  mutate(rr1 = rescale(CC20_441a, to = c(1,0)),
         rr2 = rescale(CC20_441b, to = c(0,1)),
         rr3 = NA,
         rr4 = NA,
         rr_index = NA,
         rr_index_full = NA) %>%
  rowwise() %>%
  mutate(rr_index = mean(c(rr1, rr2), na.rm=F)) %>%
  ungroup() %>%
  rename(case_id = caseid) %>%
  mutate(hs1 = NA,
         hs2 = NA,
         hs3 = ifelse(as.numeric(CC20_440c)<6, rescale(as.numeric(CC20_440c), to=c(1,0)), NA),
         hs4 = ifelse(as.numeric(CC20_440d)<6, rescale(as.numeric(CC20_440d), to=c(1,0)), NA),
         hs_index = (hs3 + hs4)/2) %>%
  select(year, case_id, rr1, rr2, rr3, rr4,rr_index, rr_index_full, hs1, hs2, hs3, hs4, hs_index)


# Bind all years ----
mc_rr_evens <- rbind(cces_2008_sub, cces_2010_sub, cces_2012_sub, cces_2014_sub, 
                     cces_2016_sub, cces_2018_sub, cces_2020_sub)

# Merge to data ----
df_even2 <- df_even %>%
  left_join(mc_rr_evens, by = c("year", "case_id"))

#### CUTTING VARIABLES I DIDNT USE IN THE ANALYSIS ####

df_even.full <- df_even2

df_even3 <- df_even.full %>%
  select(year, case_id, weight, st, cong, dist, gender, approval_rep, approval_sen1, party3, party7,
         approval_rep_na, approval_rep_med, approval_rep_mean, mc_party, mc_gender, nonwhite_mc, mc_seniority, name_full,
         mc_id, mc_effectiveness, state_dist, nonwhite, black, hispanic, asian, lead_gender_mc, lead_nonwhite_mc,
         black_mc_nw, hispanic_mc_nw, asian_mc_nw, knowledge2, female_congruence, male_congruence, gender_congruence,
         white_congruence, nonwhite_and_racecong, race_congruence, lag_race_congruent, lag_gen_congruent, lead_race_congruent,
         lead_gen_congruent, lag2_race_congruent, lag2_gen_congruent, lead2_race_congruent, lead2_gen_congruent, incongruence_ideo2,
         incongruence_ideo2.2, inc_tercile_ideo1, inc_tercile_ideo2, inc_tercile_pol, inc_tercile_pol.wt, vote_winner,rr_index, hs_index)

#### WRITE EVEN AND ODD FILES (Only use even in the paper) ####

#write.csv(df_even3, "mc_cces_evens_final2.csv", row.names = FALSE)
#write.csv(df_odd3, "mc_cces_odds3.csv", row.names = FALSE)









